library(knitr)
library(ggplot2)
library(stringr)
suppressMessages(library(dplyr))
library(sp)
suppressMessages(library(rgdal))
suppressMessages(library(dismo))
suppressMessages(library(stargazer))
library(leaflet)
library(XML)
suppressMessages(library(maptools))
library(automap)
suppressMessages(library(RStata))
suppressMessages(library(fields))
library(gstat)
library(htmltools)
suppressMessages(library(Matching))
wd <- "/Users/mlowes/drive/optimized_agronomy/soil/soil_health_study/data/ke baseline"
dd <- paste(wd, "data", sep = "/")
od <- paste(wd, 'output', sep = "/")
load(paste("/Users/mlowes/drive/R_help/3_analyze/regressions", "regression_functions.Rdata", sep = "/"))
d <- read.csv(paste(dd, "KE SHS Baseline Survey Data.csv", sep = '/'))
soil <- read.csv(paste(dd, "Kenya_SHS_results.csv", sep = "/"))
cnP <- read.csv(paste(dd, "Kenya_SHS_N_C.csv", sep = "/"))
Identifiers <- read.csv(paste(dd, "Combined meta with SSN.csv", sep = '/'), stringsAsFactors = F)
First step is merging the data. The ids in the Identifiers data need to be adjusted so that they match the ids in the ids in CommCare
Identifiers$oaf.ID <- tolower(Identifiers$oaf.ID)
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega North"] <- "Kakamega B (North)"
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega South"] <- "Kakamega (South)"
d$id.DistrictName <- as.character(d$id.DistrictName)
d$id.DistrictName[d$id.DistrictName=="GemLundha"] <- "Gem"
d$id.DistrictName[d$id.DistrictName=="Lug Ari"] <- "Lugari"
d$id.Soil_Sample_Id <- tolower(d$id.Soil_Sample_Id)
Identifiers$sample_id <- ifelse(grepl("c", Identifiers$oaf.ID), Identifiers$oaf.ID, paste(tolower(Identifiers$DISTRICT), Identifiers$oaf.ID, sep = ""))
Identifiers$sample_id <- gsub(" ", "", Identifiers$sample_id)
d$id.Soil_Sample_Id <- gsub(" ", "", d$id.Soil_Sample_Id)
table(d$id.Soil_Sample_Id %in% Identifiers$sample_id)
##
## FALSE TRUE
## 35 2033
We’re currently missing 35 matches. Investigate why we’re missing these. Below are the ids that do not appear in the Identifiers data but appear in the CommCare survey data:
d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]
## [1] "busia33787" "rachuonyo40077"
## [3] "kakamegab(north)15878" "rongo33010"
## [5] "c1136" "rongo35982"
## [7] "gem45918" "gem64358"
## [9] "c857" "rachuonyo42533"
## [11] "rachuonyo39005" "kakamegab(north)6413"
## [13] "kakamegab(north)61463" "butere49984"
## [15] "kisii43004" "gem57424"
## [17] "c703" "c290"
## [19] "busia34175" "c241"
## [21] "webuye52641" "c863"
## [23] "c706" "migori17557"
## [25] "migori14465" "rachuonyo16330"
## [27] "matete54668" "suneka30533"
## [29] "c1121" "suneka16515"
## [31] "lugari52284" "kakamegab(north)63510"
## [33] "gem59393" "rachuonyo22477"
## [35] "c1176"
missing <- d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]
Issues seem to be:
missingAll <- d[!d$id.Soil_Sample_Id %in% Identifiers$sample_id,]
write.csv(missingAll, paste(od, "missing_matches.csv", sep = "/"), row.names = F)
For the ids that should have an OAF id but don’t see if that oaf id exists in the Identifiers data.
missingId <- str_extract_all(missing,"\\(?[0-9,.]+\\)?")
missingId <- unlist(missingId)
table(missingId %in% Identifiers$oaf.ID)
##
## FALSE TRUE
## 33 2
missingId[missingId %in% Identifiers$oaf.ID]
## [1] "61463" "63510"
These are the ids that appear in the Identifiers data but not in the CommCare surey data:
Identifiers$sample_id[!Identifiers$sample_id %in% d$id.Soil_Sample_Id]
## [1] "busia33789" "kakamegab(north)15873"
## [3] "butere44824" "butere56493"
## [5] "butere59600" "gem4598"
## [7] "gem37424" "gem64558"
## [9] "c328" "c280"
## [11] "webuye1233" "rachuonyo224777"
## [13] "rachuonyo38459" "rachuonyo163330"
## [15] "rachuonyo425333" "rachuonyo39009"
## [17] "rachuonyo400777" "suneka16575"
## [19] "c1174" "rongo3310"
## [21] "kisii3004" "c627"
## [23] "migori7557" "migori14342"
## [25] "migori8936" "migori14159"
## [27] "migori7179" "migori17315"
## [29] "migori13503" "c901"
## [31] "c894" "c892"
## [33] "c893" "c861"
## [35] "c862" "c895"
## [37] "c896"
Come back to this. I don’t want to simply drop observations but we want to be working with full data for summaries and regressions.
d <- merge(d, Identifiers[,c("SSN", "sample_id")], by.x="id.Soil_Sample_Id", by.y="sample_id", all.x=TRUE)
d <- d[!is.na(d$SSN),]
table(d$SSN %in% soil$SSN)
##
## FALSE TRUE
## 1 2035
d <- merge(d, soil[,c(1,3,6:24)], by="SSN", all.x=TRUE)
Merge C and N predictions
table(d$SSN %in% cnP$SSN)
##
## FALSE TRUE
## 1 2035
d <- merge(d, cnP, by="SSN", all.x=TRUE)
names(d)[names(d)=="OC_PLS1" | names(d)== "TN_PLS1"] <- c("Total.C", "Total.N")
# take out weird CommCare stuff
d[d=="---"] <- NA
names(d) <- gsub("id.", "", names(d))
names(d) <- gsub("livestock.", "", names(d))
# seedtype to yield
names(d)[24:27] <- gsub("plot_information.", "", names(d)[24:27])
#inputs
names(d)[34:65] <- gsub("plot_information.", "", names(d)[34:65])
#intercrop
names(d)[28:33] <- gsub("plot_information.plot_information.","intercrop.", names(d)[28:33])
names(d) <- gsub("historical.", "", names(d))
names(d) <- gsub("field_information.", "", names(d))
Recode variables to numeric
varlist <- c("seedkgs", "yield", "intercrop.seedkgs", "intercrop.yield",
"inputs.dap_kg", "inputs.can_kg", "inputs.npk_kg", "inputs.urea_kg", "inputs.dap_kg_intercrop", "inputs.can_kg_intercrop",
"inputs.npk_kg_intercrop", "inputs.urea_kg_intercrop",
"inputs.lime_kg")
d[, varlist] <- sapply(d[,varlist], as.numeric)
d <- cbind(d, str_split_fixed(d$gps, " ", n=4))
names(d)[names(d)=="1" |names(d)== "2" | names(d)== "3" | names(d)== "4"] <- c("lat", "lon", "alt", "precision")
d[,c("lat", "lon", "alt", "precision")] <- sapply(
d[,c("lat", "lon", "alt", "precision")], function(x){as.numeric(as.character(x))})
Count the number of missing values in all soil variables
soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
"pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn", "Total.C",
"Total.N")
naCount <- sapply(d[,soilVars], function(x){
return(sum(is.na(x)))
})
naCount
## C.E.C Cu EC Exch.Al Hp K Mg Mn pH
## 1 1 1 1 1 1 1 1 1
## B Ca Fe Na P PSI S Zn Total.C
## 1 1 1 1 1 1 1 1 1
## Total.N
## 1
Drop data with missing soil data - it’s not useful for later summaries and regressions
note: Come back and understand why we don’t have perfect matches.
d <- d[-which(is.na(d$Ca)),]
summary(d[,soilVars])
## C.E.C Cu EC Exch.Al
## Min. : 2.614 Min. : 0.8157 Min. : 22.90 Min. :0.002227
## 1st Qu.: 7.047 1st Qu.: 2.1852 1st Qu.: 38.72 1st Qu.:0.049040
## Median : 9.567 Median : 3.0869 Median : 46.07 Median :0.085674
## Mean :10.420 Mean : 3.2745 Mean : 47.52 Mean :0.128068
## 3rd Qu.:13.088 3rd Qu.: 4.2528 3rd Qu.: 54.15 3rd Qu.:0.142481
## Max. :40.192 Max. :10.5906 Max. :101.09 Max. :2.720619
## Hp K Mg Mn
## Min. :0.03122 Min. : 35.39 Min. : 31.16 Min. : 40.01
## 1st Qu.:0.22529 1st Qu.: 94.18 1st Qu.:108.72 1st Qu.:111.81
## Median :0.30606 Median :126.93 Median :157.62 Median :154.54
## Mean :0.35452 Mean :142.42 Mean :181.78 Mean :157.50
## 3rd Qu.:0.42156 3rd Qu.:182.19 3rd Qu.:239.80 3rd Qu.:198.95
## Max. :3.32301 Max. :550.83 Max. :579.63 Max. :340.54
## pH B Ca Fe
## Min. :4.379 Min. :0.01646 Min. : 138.8 Min. : 58.5
## 1st Qu.:5.573 1st Qu.:0.09394 1st Qu.: 628.2 1st Qu.:112.9
## Median :5.858 Median :0.14120 Median : 916.3 Median :132.2
## Mean :5.876 Mean :0.15279 Mean :1064.3 Mean :136.7
## 3rd Qu.:6.149 3rd Qu.:0.19316 3rd Qu.:1303.7 3rd Qu.:156.7
## Max. :8.784 Max. :0.67119 Max. :8246.8 Max. :345.6
## Na P PSI S
## Min. :17.59 Min. : 0.05591 Min. : 21.14 Min. : 5.931
## 1st Qu.:27.85 1st Qu.: 3.53380 1st Qu.: 87.12 1st Qu.:10.754
## Median :30.76 Median : 7.06593 Median :119.79 Median :12.646
## Mean :31.53 Mean : 11.98246 Mean :125.76 Mean :13.018
## 3rd Qu.:34.56 3rd Qu.: 13.34762 3rd Qu.:157.05 3rd Qu.:14.852
## Max. :58.39 Max. :198.04271 Max. :369.55 Max. :29.903
## Zn Total.C Total.N
## Min. : 1.682 Min. :0.7858 Min. :0.05101
## 1st Qu.: 3.639 1st Qu.:1.7646 1st Qu.:0.12616
## Median : 4.511 Median :2.1194 Median :0.15722
## Mean : 4.757 Mean :2.1665 Mean :0.15790
## 3rd Qu.: 5.653 3rd Qu.:2.5581 3rd Qu.:0.18766
## Max. :12.462 Max. :4.5113 Max. :0.31267
Check SiteName variable
#table(d$SiteName) many misspellings
d$SiteName <- tolower(d$SiteName)
for(i in 1:length(soilVars)){
print(
ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) +
geom_boxplot() +
labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
)
}
pdf(paste(od, "ke_baseline_soil.pdf", sep = "/"), width=11, height=8.5)
print(
ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) +
geom_boxplot() +
labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
)
dev.off()
## quartz_off_screen
## 2
There are biologically predictable relationships between soil chemical characteristics. For instance, we expect Ca and Mg to move in the same direction and be positively correlated with pH. If we had Aluminum as an outcome, we’d expect pH to be negatively correlated with soluable aluminum. Let’s look quickly to confirm if those relationships are present:
rel1 <- ggplot(d, aes(x=Ca, y=Mg)) + geom_point() +
stat_smooth(method = "loess") +
labs(x = "Calcium", y= "Magnesium", title="Calcium/Magnesium relationship")
print(rel1)
rel2 <- ggplot(d, aes(x=pH, y=Ca)) + geom_point() +
stat_smooth(method = "loess") +
labs(x = "pH", y="Calcium", title = "pH and Calcium relationship")
print(rel2)
rel3 <- ggplot(d, aes(x=pH, y=Exch.Al)) + geom_point() +
stat_smooth(method = "loess") +
labs(x = "pH", y="Exchangable Aluminum", title = "pH and Exch.Al relationship")
print(rel3)
rel4 <- ggplot(d, aes(x=Total.C, y=Total.N)) + geom_point() +
stat_smooth(method="loess") +
labs(x = "Total Carbon", y="Total Nitrogen", title = "Carbon and Nitrogen relationship")
pdf(paste(od, "ke_baseline_soil_relationships.pdf", sep = "/"), width=11, height=8.5)
print(rel1)
print(rel2)
print(rel3)
print(rel4)
dev.off()
## quartz_off_screen
## 2
Interpretation We see the predictable soil relationships we expected. This indicates that internally, our soil data are behaving in compliance with biological principles.
Save clean demographic and soil data to external file
write.csv(d, file=paste(dd, "shs ke baseline.csv", sep = "/"))
save(d, file=paste(dd, "shs ke baseline.Rdata", sep = "/"))
count <- d %>% group_by(DistrictName) %>%
dplyr::summarize(
t.count = sum(ifelse(oaf==1,1,0)),
c.count = sum(ifelse(oaf==0,1,0)),
total = n()
) %>% ungroup()
count <- as.data.frame(count)
write.csv(count, file=paste(od, "final ke sample breakdown.csv", sep="/"), row.names=F)
as.data.frame(count)
## DistrictName t.count c.count total
## 1 Busia 43 45 88
## 2 Butere 116 119 235
## 3 Chwele 40 39 79
## 4 Gem 134 135 269
## 5 Kakamega (South) 91 89 180
## 6 Kakamega B (North) 82 84 166
## 7 Kisii 47 50 97
## 8 Lugari 88 86 174
## 9 Matete 46 47 93
## 10 Migori 86 85 171
## 11 Rachuonyo 84 90 174
## 12 Rongo 78 78 156
## 13 Suneka 34 35 69
## 14 Webuye 42 42 84
d$valley <- ifelse(d$location=="valley", 1,0)
d$hilltop <- ifelse(d$location=="hilltop", 1,0)
d$hillside <- ifelse(d$location=="hillside", 1,0)
sort(prop.table(table(d$plot_information.maincrop, useNA = 'ifany')))
##
## sukuma groundnuts millet sweetpotatoes beans
## 0.001965602 0.002948403 0.002948403 0.003931204 0.004422604
## sorghum sugar other fallow maize
## 0.005405405 0.006879607 0.017199017 0.023095823 0.931203931
d$crop.maize <- ifelse(d$plot_information.maincrop=="maize", 1,0)
Notes from Kalie: We should be dividing input quantity by land size. 100 kgs of inputs is the soft ceiling for OAF fields as we didn’t offer more than 100 kgs of fertilizer per acre. It’s also impossible for an OAF field to be smaller than 0.25 acres.
ggplot(d, aes(x=field_size, y=inputs.dap_kg)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).
This would mean however that if I want to get the actual amount they applied to their field to see that that quantity also makes sense, I would multiply by the size of the field.
d$dap.check <- d$inputs.dap_kg*d$field_size
ggplot(d, aes(x=dap.check)) + geom_density()
## Warning: Removed 479 rows containing non-finite values (stat_density).
Multiplying certainly keeps us in a more normal range. The scatter plot looks okay as well. The variance in per acre application rate increases with the size of the plot.
ggplot(d, aes(x=field_size, y=dap.check)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).
However, if the input quatity variable is already per plot size, as the codebook suggests, I’d want to divide by the size of the plot to get the per acre application rate. This is what Kalie suggests
d$dap.acre <- d$inputs.dap_kg/d$field_size
d$can.acre <- d$inputs.can_kg/d$field_size
d$npk.acre <- d$inputs.npk_kg/d$field_size
d$urea.acre <- d$inputs.urea_kg/d$field_size
d$inputs.compost <- ifelse(d$inputs.compost==-99, NA, d$inputs.compost)
d$compost.acre <- d$inputs.compost/d$field_size
acreInputs <- names(d)[grep("acre", names(d))]
Print out the calculation to make certain I’m doing this right:
#head(d[, c("inputs.dap_kg", "field_size", "dap.acre")])
d[which.max(d$dap.acre), c("inputs.dap_kg", "field_size", "dap.acre", "oaf")]
## inputs.dap_kg field_size dap.acre oaf
## 831 47 0.0625 752 0
for(i in 1:length(acreInputs)){
print(
ggplot(data=d, aes(x=d[,acreInputs[i]])) +
geom_histogram() +
facet_wrap(~ oaf, scales='free') +
labs(x=acreInputs[i], title = paste("Kenya input quantity per acre - ", acreInputs[i], sep = ""))
)
}
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 479 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1099 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1999 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1822 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 44 rows containing non-finite values (stat_bin).
These values are totally unreasonable! These per acre (and per hectare) values seem too big. Let’s look at the per acre rates by field size
for(i in 1:length(acreInputs)){
print(
ggplot(d, aes(x = field_size, y=d[, acreInputs[i]])) +
geom_point() +
labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
)
}
## Warning: Removed 479 rows containing missing values (geom_point).
## Warning: Removed 1099 rows containing missing values (geom_point).
## Warning: Removed 1999 rows containing missing values (geom_point).
## Warning: Removed 1822 rows containing missing values (geom_point).
## Warning: Removed 44 rows containing missing values (geom_point).
d$field_size.adj <- ifelse(d$oaf==1 & d$field_size<0.25, 0.25, d$field_size)
# drop really small field sizes >> they're inflating the application rates
d$field_size.adj <- ifelse(d$field_size<0.125, 0.125, d$field_size.adj)
# updated version of the input/acre variables - winsoring values to 100 kg.
d$dap.acre <- d$inputs.dap_kg/d$field_size.adj
d$dap.acre <- ifelse(d$dap.acre>100, 100, d$dap.acre)
d$can.acre <- d$inputs.can_kg/d$field_size.adj
d$can.acre <- ifelse(d$can.acre>100, 100, d$can.acre)
d$npk.acre <- d$inputs.npk_kg/d$field_size.adj
d$npk.acre <- ifelse(d$npk.acre>100, 100, d$npk.acre)
d$urea.acre <- d$inputs.urea_kg/d$field_size.adj
d$urea.acre <- ifelse(d$urea.acre>100, 100, d$urea.acre)
d$compost.acre <- d$inputs.compost/d$field_size.adj
Clean all per acre variables
summary(d[,acreInputs])
## dap.acre can.acre npk.acre urea.acre
## Min. : 2.50 Min. : 1.50 Min. : 1.50 Min. : 2.00
## 1st Qu.: 30.67 1st Qu.: 25.33 1st Qu.: 10.30 1st Qu.: 22.00
## Median : 46.00 Median : 42.00 Median : 19.00 Median : 34.00
## Mean : 53.76 Mean : 46.49 Mean : 29.44 Mean : 42.07
## 3rd Qu.: 92.00 3rd Qu.: 68.00 3rd Qu.: 40.00 3rd Qu.: 60.00
## Max. :100.00 Max. :100.00 Max. :100.00 Max. :100.00
## NA's :479 NA's :1099 NA's :1999 NA's :1822
## compost.acre
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 0.0
## Mean : 240.5
## 3rd Qu.: 120.0
## Max. :53333.3
## NA's :44
for(i in 1:length(acreInputs)){
print(
ggplot(d, aes(x = field_size.adj, y=d[, acreInputs[i]], color=as.factor(oaf))) +
geom_point() +
#facet_wrap(~oaf) +
labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
)
}
## Warning: Removed 479 rows containing missing values (geom_point).
## Warning: Removed 1099 rows containing missing values (geom_point).
## Warning: Removed 1999 rows containing missing values (geom_point).
## Warning: Removed 1822 rows containing missing values (geom_point).
## Warning: Removed 44 rows containing missing values (geom_point).
Kim says: While the values vary far more than we see in typical M&E data, we don’t have a good reference point for fertilizer application rates during the short rains, the period about which we asked. It’s conceivable that the fertilizer/acre quantity varies this much, even among OAF farmers. I’m going to go with that for now.
pastYears <- names(d)[grep("_5", names(d))]
summary(d[,pastYears])
## chemical_fertilizer_5 compost_fertilizer_5 lime_fertilizer_5
## Min. :-99.000 Min. :-99.000 Min. :-99.0000
## 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 5.000 Median : 1.000 Median : 0.0000
## Mean : 3.211 Mean : 1.676 Mean : -0.1169
## 3rd Qu.: 5.000 3rd Qu.: 5.000 3rd Qu.: 0.0000
## Max. : 5.000 Max. : 5.000 Max. : 5.0000
## fertilizer_5 legum_maincrop_5 legum_intercrop_5
## Min. :-99.00000 Min. :-99.0000 Min. :-99.00
## 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 0.00000 Median : 0.0000 Median : 3.00
## Mean : -0.09042 Mean : 0.1032 Mean : 2.64
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.: 5.00
## Max. : 5.00000 Max. : 5.0000 Max. : 5.00
# replace -99 with NA
d[,pastYears] <- sapply(d[,pastYears], function(x){
ifelse(x==-99, NA, x)
})
Generate tenure vs. pH and tenure vs. fertilizer seasons scatter plots:
d$seasons_oaf <- ifelse(d$seasons_oaf>8, NA, d$seasons_oaf)
tenure.ph <- ggplot(d, aes(x= jitter(seasons_oaf), y=pH)) + geom_point() +
geom_smooth(method='loess') +
#geom_smooth(method='lm', color="red") +
#scale_x_discrete(=seq(0,8,1)) +
labs(x = "OAF Tenure", y = "Soil pH", title = "Kenya OAF Tenure vs. soil pH")
tenure.ph
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
pdf(paste(od, "tenure vs ph.pdf", sep = "/"), width=11, height=8.5)
print(tenure.ph)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
d$seasons_breaks <- ifelse(d$seasons_oaf<2, 1, ifelse(d$seasons_oaf>=2 & d$seasons_oaf<5,2,3))
tenure.breaks <- ggplot(data=subset(d, !is.na(d$seasons_breaks)), aes(x=jitter(seasons_oaf), y=pH)) +
stat_smooth(method = "lm", se=FALSE, color="blue") +
geom_point() +
labs(title = "Kenya OAF Tenure vs. soil pH",
x = "Seasons with OAF", y = "Soil pH") +
facet_wrap(~ seasons_breaks, scales="free_y")
tenure.breaks
tenure.fertilizer <- ggplot(d, aes(x= jitter(seasons_oaf), y=jitter(chemical_fertilizer_5))) + geom_point() +
geom_smooth(method='loess') +
labs(x = "OAF Tenure", y = "Years of Fertilizer in past 5 years", title = "Kenya OAF Tenure vs. soil pH")
tenure.fertilizer
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
pdf(paste(od, "tenure vs fertilizer.pdf", sep = "/"), width=11, height=8.5)
print(tenure.fertilizer)
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen
## 2
d$own.cows <- ifelse(d$cows>0 & !is.na(d$cows), 1,0)
d$own.goats <- ifelse(d$goats>0 & !is.na(d$goats), 1,0)
d$own.chickens <- ifelse(d$chickens>0 & !is.na(d$chickens), 1,0)
d$own.pigs <- ifelse(d$pigs>0 & !is.na(d$pigs), 1,0)
d$own.sheep <- ifelse(d$sheep>0 & !is.na(d$sheep), 1,0)
d$clay_soil <- ifelse(d$soil_type=="clay", 1,0)
d$loam_soil <- ifelse(d$soil_type=="loam", 1,0)
d$sandy_soil <- ifelse(d$soil_type=="sandy", 1,0)
Let’s see how balanced our farmers are in terms of demographic variables. One Acre Fund farmers were selected based on (list criteria) and control farmers in the same area tha fit the same criteria were also selected. No matching process has been performed to identify the control farmers that most closely resemble the One Acre Fund farmers in the sample.
names(d)[names(d)=="gender"] <- "female"
out.list <- c("female", "age", "cows", "goats", "chickens",
"pigs", "sheep", "field_size", "yield", "dap.acre", "can.acre", "npk.acre", "urea.acre", "chemical_fertilizer_5", "compost_fertilizer_5", "lime_fertilizer_5",
"fertilizer_5", "legum_maincrop_5", "legum_intercrop_5", soilVars, "valley", "hilltop", "hillside", "crop.maize", "alt", "own.cows", "own.goats", "own.chickens", "own.pigs", "own.sheep", "clay_soil", "loam_soil", "sandy_soil")
output <- do.call(rbind, lapply(out.list, function(x) {
out <- t.test(d[,x] ~ d[,"oaf"], data=d)
tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
output$pvalue <- p.adjust(output$pvalue, method="fdr")
rownames(output) <- out.list
output <- output[order(output$pvalue),]
output$pvalue <- ifelse(output[, 3] < 0.001, "< 0.001", round(output[, 3], 3))
colnames(output) <- c("1AF Client","Control", "p-value")
print(kable(output))
| 1AF Client | Control | p-value | |
|---|---|---|---|
| yield | 57.479 | 49.790 | < 0.001 |
| chickens | 11.247 | 7.953 | < 0.001 |
| own.cows | 0.760 | 0.655 | < 0.001 |
| cows | 2.111 | 1.656 | < 0.001 |
| legum_intercrop_5 | 2.582 | 2.995 | < 0.001 |
| own.chickens | 0.908 | 0.857 | 0.003 |
| can.acre | 43.905 | 50.311 | 0.006 |
| dap.acre | 51.401 | 56.489 | 0.009 |
| B | 0.147 | 0.158 | 0.014 |
| Ca | 1021.661 | 1106.482 | 0.014 |
| age | 46.187 | 44.361 | 0.015 |
| pH | 5.845 | 5.907 | 0.015 |
| own.goats | 0.233 | 0.186 | 0.031 |
| goats | 0.555 | 0.422 | 0.036 |
| own.pigs | 0.073 | 0.048 | 0.056 |
| female | 0.706 | 0.660 | 0.081 |
| EC | 46.941 | 48.092 | 0.082 |
| pigs | 0.199 | 0.109 | 0.106 |
| chemical_fertilizer_5 | 3.552 | 3.376 | 0.106 |
| own.sheep | 0.173 | 0.144 | 0.174 |
| C.E.C | 10.257 | 10.582 | 0.215 |
| Hp | 0.362 | 0.347 | 0.215 |
| sheep | 0.468 | 0.375 | 0.245 |
| K | 140.274 | 144.533 | 0.273 |
| S | 13.122 | 12.916 | 0.273 |
| crop.maize | 0.923 | 0.939 | 0.273 |
| lime_fertilizer_5 | 0.039 | 0.020 | 0.299 |
| Mg | 178.907 | 184.621 | 0.304 |
| legum_maincrop_5 | 0.273 | 0.227 | 0.349 |
| fertilizer_5 | 0.222 | 0.182 | 0.405 |
| Exch.Al | 0.132 | 0.124 | 0.537 |
| field_size | 0.540 | 0.526 | 0.633 |
| Fe | 137.280 | 136.182 | 0.707 |
| Na | 31.446 | 31.611 | 0.707 |
| npk.acre | 33.733 | 27.557 | 0.777 |
| P | 12.205 | 11.763 | 0.777 |
| Mn | 158.096 | 156.909 | 0.811 |
| valley | 0.718 | 0.729 | 0.811 |
| hillside | 0.257 | 0.248 | 0.811 |
| clay_soil | 0.073 | 0.079 | 0.811 |
| PSI | 126.270 | 125.265 | 0.838 |
| loam_soil | 0.731 | 0.725 | 0.908 |
| compost_fertilizer_5 | 1.837 | 1.812 | 0.921 |
| Total.C | 2.170 | 2.163 | 0.921 |
| hilltop | 0.025 | 0.023 | 0.963 |
| urea.acre | 42.139 | 42.028 | 0.992 |
| Cu | 3.273 | 3.276 | 0.992 |
| Zn | 4.757 | 4.758 | 0.992 |
| Total.N | 0.158 | 0.158 | 0.992 |
| alt | 1268.062 | 1268.290 | 0.992 |
| sandy_soil | 0.196 | 0.196 | 0.992 |
Demographic variables:
Agricultural practice variables:
Soil Variables:
t10order <- c("age", "female")
table10vars <- paste(t10order, collapse="|")
ke.table10 <- output[grepl(table10vars, rownames(output)),]
ke.table10 <- ke.table10[order(match(rownames(ke.table10), t10order)),]
write.csv(ke.table10, file=paste(od, "pre match balance table10.csv", sep="/"),
row.names = T)
t11order <- c("field_size", "alt", "hilltop", "hillside", "valley", "crop.maize")
table11vars <- paste(t11order, collapse="|")
ke.table11 <- output[grepl(table11vars, rownames(output)),]
ke.table11 <- ke.table11[order(match(rownames(ke.table11), t11order)),]
write.csv(ke.table11, file=paste(od, "pre match balance table 11.csv", sep = "/"),
row.names=T)
t12order <- c("own.cows", "cows", "own.pigs", "pigs", "own.sheep", "sheep", "own.goats", "goats", "own.chickens", "chickens")
table12vars <- paste(t12order, collapse="|")
ke.table12 <- output[grepl(table12vars, rownames(output)),]
ke.table12 <- ke.table12[order(match(rownames(ke.table12), t12order)),]
write.csv(ke.table12, file=paste(od, "pre match balance table 12.csv", sep = "/"),
row.names=T)
#write table
write.csv(output, file=paste(od, "ke baseline balance.csv", sep="/"), row.names=T)
I remove intercrop elements from out.list so that the code runs properly. I have to remove all fertilizer comparisons. CAN fails in Suneka. The others fail across districts due to insuffient observations.
toMatch <- c("npk", "urea", "g_intercrop", "can")
distList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]
dist.output <- do.call(rbind, lapply(split(d, d$DistrictName), function(x) {
tab <- do.call(rbind, lapply(distList, function(y) {
out <- t.test(x[,y] ~ x[,"oaf"], data=x)
tab <- data.frame(out[[5]][[2]], out[[5]][[1]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
return(data.frame(district = unique(x$DistrictName), tab))
}))
rownames(dist.output) <- NULL
dist.output$variable <- rep(distList,length(unique(d$DistrictName)))
# order variables
dist.output <- dist.output[, c(1, 5, 2:4)]
dist.output$pvalue <- p.adjust(dist.output$pvalue, method="fdr")
dist.output <- dist.output[order(dist.output$pvalue),]
dist.output$pvalue <- ifelse(dist.output$pvalue < 0.001, "< 0.001", round(dist.output$pvalue,3))
colnames(dist.output) <- c("District", "Varible", "1AF Client","Control", "p-value")
print(kable(dist.output))
| District | Varible | 1AF Client | Control | p-value | |
|---|---|---|---|---|---|
| 211 | Kakamega (South) | EC | 38.026 | 44.894 | < 0.001 |
| 272 | Kakamega B (North) | S | 11.420 | 10.092 | < 0.001 |
| 496 | Rachuonyo | legum_intercrop_5 | 1.202 | 2.478 | < 0.001 |
| 221 | Kakamega (South) | Na | 30.142 | 32.164 | < 0.001 |
| 225 | Kakamega (South) | Zn | 4.423 | 5.126 | < 0.001 |
| 214 | Kakamega (South) | K | 104.804 | 131.847 | 0.006 |
| 209 | Kakamega (South) | C.E.C | 8.146 | 9.899 | 0.009 |
| 259 | Kakamega B (North) | EC | 46.863 | 54.240 | 0.009 |
| 656 | Webuye | S | 12.120 | 10.849 | 0.01 |
| 569 | Rongo | own.cows | 0.821 | 0.551 | 0.016 |
| 215 | Kakamega (South) | Mg | 136.679 | 168.831 | 0.018 |
| 224 | Kakamega (South) | S | 16.017 | 14.313 | 0.018 |
| 547 | Rongo | EC | 61.226 | 54.433 | 0.019 |
| 57 | Butere | yield | 60.086 | 48.496 | 0.022 |
| 217 | Kakamega (South) | pH | 5.714 | 5.945 | 0.022 |
| 266 | Kakamega B (North) | B | 0.152 | 0.198 | 0.022 |
| 58 | Butere | dap.acre | 48.163 | 65.737 | 0.022 |
| 219 | Kakamega (South) | Ca | 777.513 | 1002.114 | 0.026 |
| 201 | Kakamega (South) | yield | 60.187 | 47.404 | 0.031 |
| 297 | Kisii | yield | 51.022 | 35.080 | 0.031 |
| 273 | Kakamega B (North) | Zn | 4.207 | 4.785 | 0.033 |
| 218 | Kakamega (South) | B | 0.106 | 0.146 | 0.039 |
| 608 | Suneka | S | 12.862 | 14.286 | 0.041 |
| 99 | Chwele | cows | 2.300 | 1.051 | 0.06 |
| 513 | Rachuonyo | Zn | 5.316 | 4.501 | 0.063 |
| 595 | Suneka | EC | 54.096 | 47.497 | 0.063 |
| 91 | Butere | own.chickens | 0.931 | 0.798 | 0.067 |
| 137 | Chwele | own.cows | 0.800 | 0.487 | 0.079 |
| 208 | Kakamega (South) | legum_intercrop_5 | 3.511 | 4.247 | 0.087 |
| 107 | Chwele | chemical_fertilizer_5 | 4.100 | 2.974 | 0.091 |
| 265 | Kakamega B (North) | pH | 5.829 | 6.055 | 0.092 |
| 381 | Lugari | own.sheep | 0.364 | 0.174 | 0.092 |
| 531 | Rongo | cows | 2.372 | 1.487 | 0.092 |
| 257 | Kakamega B (North) | C.E.C | 9.470 | 11.305 | 0.103 |
| 488 | Rachuonyo | field_size | 0.497 | 0.396 | 0.115 |
| 592 | Suneka | legum_intercrop_5 | 3.324 | 4.343 | 0.117 |
| 270 | Kakamega B (North) | P | 10.114 | 14.049 | 0.135 |
| 598 | Suneka | K | 186.138 | 156.538 | 0.135 |
| 256 | Kakamega B (North) | legum_intercrop_5 | 2.805 | 3.583 | 0.135 |
| 533 | Rongo | chickens | 9.897 | 5.551 | 0.135 |
| 593 | Suneka | C.E.C | 12.912 | 11.025 | 0.135 |
| 39 | Busia | crop.maize | 0.767 | 0.956 | 0.167 |
| 269 | Kakamega B (North) | Na | 28.847 | 30.348 | 0.167 |
| 341 | Lugari | chickens | 13.341 | 8.767 | 0.167 |
| 504 | Rachuonyo | Mn | 207.654 | 183.951 | 0.167 |
| 82 | Butere | Total.C | 1.882 | 2.030 | 0.169 |
| 262 | Kakamega B (North) | K | 124.058 | 144.309 | 0.173 |
| 87 | Butere | crop.maize | 0.948 | 1.000 | 0.173 |
| 153 | Gem | yield | 53.281 | 45.119 | 0.173 |
| 249 | Kakamega B (North) | yield | 69.341 | 59.321 | 0.173 |
| 298 | Kisii | dap.acre | 45.537 | 63.035 | 0.173 |
| 343 | Lugari | sheep | 1.091 | 0.442 | 0.173 |
| 560 | Rongo | S | 11.638 | 12.507 | 0.173 |
| 584 | Suneka | field_size | 0.445 | 0.305 | 0.173 |
| 586 | Suneka | dap.acre | 53.032 | 72.151 | 0.173 |
| 263 | Kakamega B (North) | Mg | 150.277 | 178.955 | 0.181 |
| 654 | Webuye | P | 6.752 | 10.124 | 0.182 |
| 355 | Lugari | EC | 46.502 | 50.408 | 0.184 |
| 151 | Gem | sheep | 0.761 | 0.333 | 0.2 |
| 210 | Kakamega (South) | Cu | 3.401 | 3.705 | 0.2 |
| 243 | Kakamega B (North) | cows | 2.354 | 1.702 | 0.2 |
| 602 | Suneka | B | 0.178 | 0.149 | 0.2 |
| 611 | Suneka | Total.N | 0.191 | 0.175 | 0.2 |
| 222 | Kakamega (South) | P | 4.821 | 6.378 | 0.204 |
| 233 | Kakamega (South) | own.cows | 0.824 | 0.674 | 0.204 |
| 362 | Lugari | B | 0.169 | 0.200 | 0.208 |
| 643 | Webuye | EC | 42.309 | 47.568 | 0.211 |
| 603 | Suneka | Ca | 1197.599 | 988.904 | 0.232 |
| 267 | Kakamega B (North) | Ca | 952.727 | 1186.483 | 0.235 |
| 443 | Migori | chemical_fertilizer_5 | 2.826 | 2.106 | 0.237 |
| 245 | Kakamega B (North) | chickens | 10.683 | 7.702 | 0.238 |
| 446 | Migori | fertilizer_5 | 0.081 | 0.271 | 0.245 |
| 653 | Webuye | Na | 27.217 | 29.037 | 0.249 |
| 189 | Gem | own.sheep | 0.246 | 0.141 | 0.251 |
| 532 | Rongo | goats | 1.026 | 0.551 | 0.251 |
| 5 | Busia | chickens | 17.093 | 8.178 | 0.254 |
| 339 | Lugari | cows | 2.045 | 1.291 | 0.254 |
| 188 | Gem | own.pigs | 0.052 | 0.007 | 0.261 |
| 599 | Suneka | Mg | 230.982 | 196.650 | 0.261 |
| 482 | Rachuonyo | age | 48.381 | 44.611 | 0.266 |
| 98 | Chwele | age | 46.625 | 39.410 | 0.27 |
| 9 | Busia | yield | 58.244 | 46.023 | 0.305 |
| 489 | Rachuonyo | yield | 52.155 | 45.178 | 0.305 |
| 570 | Rongo | own.goats | 0.397 | 0.244 | 0.308 |
| 53 | Butere | chickens | 10.879 | 7.941 | 0.309 |
| 90 | Butere | own.goats | 0.207 | 0.109 | 0.309 |
| 650 | Webuye | B | 0.158 | 0.192 | 0.323 |
| 380 | Lugari | own.pigs | 0.045 | 0.000 | 0.329 |
| 587 | Suneka | chemical_fertilizer_5 | 4.118 | 4.771 | 0.329 |
| 399 | Matete | legum_maincrop_5 | 1.283 | 0.681 | 0.331 |
| 241 | Kakamega B (North) | female | 0.720 | 0.571 | 0.332 |
| 158 | Gem | fertilizer_5 | 0.493 | 0.222 | 0.335 |
| 610 | Suneka | Total.C | 2.642 | 2.425 | 0.335 |
| 360 | Lugari | Mn | 119.181 | 108.006 | 0.361 |
| 150 | Gem | pigs | 0.150 | 0.007 | 0.362 |
| 59 | Butere | chemical_fertilizer_5 | 3.313 | 2.765 | 0.368 |
| 197 | Kakamega (South) | chickens | 8.890 | 6.820 | 0.368 |
| 558 | Rongo | P | 35.577 | 24.612 | 0.375 |
| 342 | Lugari | pigs | 0.068 | 0.000 | 0.377 |
| 149 | Gem | chickens | 11.739 | 8.400 | 0.38 |
| 236 | Kakamega (South) | own.pigs | 0.231 | 0.124 | 0.387 |
| 561 | Rongo | Zn | 6.361 | 5.807 | 0.387 |
| 83 | Butere | Total.N | 0.133 | 0.140 | 0.4 |
| 89 | Butere | own.cows | 0.741 | 0.630 | 0.414 |
| 377 | Lugari | own.cows | 0.693 | 0.558 | 0.414 |
| 284 | Kakamega B (North) | own.pigs | 0.110 | 0.036 | 0.419 |
| 11 | Busia | chemical_fertilizer_5 | 3.143 | 2.333 | 0.425 |
| 293 | Kisii | chickens | 9.106 | 6.020 | 0.449 |
| 2 | Busia | age | 44.465 | 39.222 | 0.461 |
| 183 | Gem | crop.maize | 0.910 | 0.963 | 0.461 |
| 231 | Kakamega (South) | crop.maize | 1.000 | 0.966 | 0.488 |
| 121 | Chwele | pH | 6.040 | 6.306 | 0.49 |
| 247 | Kakamega B (North) | sheep | 0.195 | 0.048 | 0.502 |
| 35 | Busia | Total.N | 0.174 | 0.160 | 0.503 |
| 213 | Kakamega (South) | Hp | 0.497 | 0.432 | 0.503 |
| 246 | Kakamega B (North) | pigs | 0.244 | 0.048 | 0.503 |
| 434 | Migori | age | 47.512 | 43.529 | 0.503 |
| 52 | Butere | goats | 0.431 | 0.235 | 0.507 |
| 441 | Migori | yield | 54.430 | 47.659 | 0.517 |
| 509 | Rachuonyo | Na | 35.768 | 34.687 | 0.517 |
| 139 | Chwele | own.chickens | 0.875 | 0.974 | 0.521 |
| 483 | Rachuonyo | cows | 2.155 | 1.689 | 0.522 |
| 34 | Busia | Total.C | 2.162 | 2.012 | 0.577 |
| 155 | Gem | chemical_fertilizer_5 | 3.254 | 3.622 | 0.577 |
| 437 | Migori | chickens | 12.163 | 9.153 | 0.577 |
| 261 | Kakamega B (North) | Hp | 0.311 | 0.267 | 0.578 |
| 285 | Kakamega B (North) | own.sheep | 0.098 | 0.036 | 0.578 |
| 370 | Lugari | Total.C | 2.288 | 2.164 | 0.58 |
| 123 | Chwele | Ca | 851.837 | 1296.267 | 0.58 |
| 216 | Kakamega (South) | Mn | 179.496 | 187.604 | 0.58 |
| 385 | Matete | female | 0.804 | 0.660 | 0.58 |
| 400 | Matete | legum_intercrop_5 | 3.022 | 3.553 | 0.58 |
| 77 | Butere | Na | 28.870 | 29.641 | 0.588 |
| 484 | Rachuonyo | goats | 0.905 | 0.567 | 0.588 |
| 448 | Migori | legum_intercrop_5 | 0.721 | 1.106 | 0.592 |
| 43 | Busia | own.chickens | 0.930 | 0.822 | 0.597 |
| 252 | Kakamega B (North) | compost_fertilizer_5 | 1.866 | 1.386 | 0.597 |
| 291 | Kisii | cows | 1.851 | 1.400 | 0.609 |
| 368 | Lugari | S | 11.077 | 10.692 | 0.61 |
| 492 | Rachuonyo | compost_fertilizer_5 | 0.952 | 1.356 | 0.61 |
| 498 | Rachuonyo | Cu | 4.410 | 4.093 | 0.61 |
| 1 | Busia | female | 0.558 | 0.711 | 0.63 |
| 78 | Butere | P | 11.216 | 13.972 | 0.63 |
| 350 | Lugari | fertilizer_5 | 0.276 | 0.094 | 0.63 |
| 605 | Suneka | Na | 34.168 | 32.524 | 0.63 |
| 666 | Webuye | own.goats | 0.143 | 0.048 | 0.63 |
| 145 | Gem | female | 0.761 | 0.681 | 0.642 |
| 475 | Migori | own.chickens | 0.953 | 0.894 | 0.642 |
| 628 | Webuye | goats | 0.262 | 0.071 | 0.642 |
| 426 | Matete | own.goats | 0.217 | 0.106 | 0.652 |
| 85 | Butere | hilltop | 0.000 | 0.017 | 0.654 |
| 115 | Chwele | EC | 45.541 | 49.370 | 0.654 |
| 179 | Gem | Total.N | 0.146 | 0.152 | 0.654 |
| 194 | Kakamega (South) | age | 48.769 | 51.955 | 0.654 |
| 258 | Kakamega B (North) | Cu | 2.469 | 2.682 | 0.654 |
| 510 | Rachuonyo | P | 10.026 | 7.528 | 0.654 |
| 635 | Webuye | chemical_fertilizer_5 | 4.262 | 3.762 | 0.654 |
| 649 | Webuye | pH | 5.823 | 5.987 | 0.654 |
| 160 | Gem | legum_intercrop_5 | 3.216 | 3.548 | 0.655 |
| 163 | Gem | EC | 39.355 | 38.185 | 0.655 |
| 655 | Webuye | PSI | 125.782 | 113.467 | 0.668 |
| 352 | Lugari | legum_intercrop_5 | 3.885 | 4.224 | 0.674 |
| 75 | Butere | Ca | 672.203 | 733.903 | 0.674 |
| 307 | Kisii | EC | 51.686 | 53.946 | 0.674 |
| 379 | Lugari | own.chickens | 0.898 | 0.826 | 0.674 |
| 281 | Kakamega B (North) | own.cows | 0.866 | 0.786 | 0.681 |
| 323 | Kisii | Total.N | 0.222 | 0.229 | 0.681 |
| 435 | Migori | cows | 2.709 | 2.141 | 0.681 |
| 508 | Rachuonyo | Fe | 135.521 | 128.348 | 0.681 |
| 185 | Gem | own.cows | 0.672 | 0.593 | 0.69 |
| 388 | Matete | goats | 0.543 | 0.255 | 0.696 |
| 113 | Chwele | C.E.C | 7.433 | 9.089 | 0.7 |
| 173 | Gem | Na | 29.773 | 29.227 | 0.7 |
| 391 | Matete | sheep | 0.087 | 0.319 | 0.7 |
| 521 | Rachuonyo | own.cows | 0.798 | 0.711 | 0.7 |
| 646 | Webuye | K | 117.889 | 132.467 | 0.7 |
| 651 | Webuye | Ca | 875.015 | 1030.262 | 0.7 |
| 369 | Lugari | Zn | 3.924 | 3.713 | 0.707 |
| 73 | Butere | pH | 5.530 | 5.604 | 0.711 |
| 253 | Kakamega B (North) | lime_fertilizer_5 | 0.085 | 0.000 | 0.711 |
| 503 | Rachuonyo | Mg | 265.564 | 246.873 | 0.711 |
| 516 | Rachuonyo | valley | 0.560 | 0.656 | 0.711 |
| 601 | Suneka | pH | 5.772 | 5.659 | 0.711 |
| 632 | Webuye | field_size | 0.545 | 0.653 | 0.711 |
| 198 | Kakamega (South) | pigs | 0.769 | 0.337 | 0.732 |
| 493 | Rachuonyo | lime_fertilizer_5 | 0.107 | 0.033 | 0.732 |
| 16 | Busia | legum_intercrop_5 | 2.279 | 2.800 | 0.734 |
| 60 | Butere | compost_fertilizer_5 | 2.276 | 1.933 | 0.734 |
| 127 | Chwele | PSI | 58.827 | 51.199 | 0.734 |
| 458 | Migori | B | 0.188 | 0.174 | 0.734 |
| 464 | Migori | S | 12.367 | 12.757 | 0.734 |
| 609 | Suneka | Zn | 5.966 | 5.538 | 0.734 |
| 481 | Rachuonyo | female | 0.679 | 0.589 | 0.748 |
| 398 | Matete | fertilizer_5 | 0.239 | 0.532 | 0.753 |
| 70 | Butere | K | 98.236 | 104.079 | 0.761 |
| 71 | Butere | Mg | 123.018 | 131.436 | 0.761 |
| 554 | Rongo | B | 0.164 | 0.149 | 0.761 |
| 331 | Kisii | own.chickens | 0.872 | 0.780 | 0.765 |
| 428 | Matete | own.pigs | 0.109 | 0.043 | 0.765 |
| 27 | Busia | Ca | 969.547 | 1141.030 | 0.768 |
| 195 | Kakamega (South) | cows | 1.967 | 1.685 | 0.768 |
| 320 | Kisii | S | 14.119 | 13.761 | 0.768 |
| 383 | Lugari | loam_soil | 0.977 | 0.942 | 0.768 |
| 403 | Matete | EC | 43.147 | 45.389 | 0.768 |
| 147 | Gem | cows | 1.970 | 1.659 | 0.771 |
| 235 | Kakamega (South) | own.chickens | 0.901 | 0.843 | 0.771 |
| 128 | Chwele | S | 9.807 | 9.423 | 0.781 |
| 200 | Kakamega (South) | field_size | 0.544 | 0.485 | 0.781 |
| 42 | Busia | own.goats | 0.256 | 0.156 | 0.782 |
| 178 | Gem | Total.C | 1.811 | 1.868 | 0.784 |
| 28 | Busia | Fe | 130.328 | 122.565 | 0.787 |
| 33 | Busia | Zn | 5.128 | 4.779 | 0.787 |
| 37 | Busia | hilltop | 0.000 | 0.022 | 0.787 |
| 51 | Butere | cows | 2.017 | 1.748 | 0.787 |
| 61 | Butere | lime_fertilizer_5 | 0.017 | 0.000 | 0.787 |
| 63 | Butere | legum_maincrop_5 | 0.181 | 0.118 | 0.787 |
| 86 | Butere | hillside | 0.009 | 0.000 | 0.787 |
| 100 | Chwele | goats | 0.275 | 0.487 | 0.787 |
| 108 | Chwele | compost_fertilizer_5 | 1.950 | 1.436 | 0.787 |
| 117 | Chwele | Hp | 0.243 | 0.206 | 0.787 |
| 133 | Chwele | hilltop | 0.025 | 0.000 | 0.787 |
| 142 | Chwele | clay_soil | 0.025 | 0.000 | 0.787 |
| 146 | Gem | age | 47.642 | 45.556 | 0.787 |
| 193 | Kakamega (South) | female | 0.736 | 0.663 | 0.787 |
| 205 | Kakamega (South) | lime_fertilizer_5 | 0.022 | 0.090 | 0.787 |
| 226 | Kakamega (South) | Total.C | 2.035 | 2.106 | 0.787 |
| 242 | Kakamega B (North) | age | 45.805 | 43.917 | 0.787 |
| 268 | Kakamega B (North) | Fe | 124.110 | 120.553 | 0.787 |
| 275 | Kakamega B (North) | Total.N | 0.157 | 0.151 | 0.787 |
| 277 | Kakamega B (North) | hilltop | 0.012 | 0.000 | 0.787 |
| 282 | Kakamega B (North) | own.goats | 0.280 | 0.214 | 0.787 |
| 302 | Kisii | fertilizer_5 | 0.106 | 0.240 | 0.787 |
| 311 | Kisii | Mg | 275.137 | 290.799 | 0.787 |
| 314 | Kisii | B | 0.210 | 0.225 | 0.787 |
| 338 | Lugari | age | 46.148 | 43.942 | 0.787 |
| 346 | Lugari | dap.acre | 48.967 | 44.422 | 0.787 |
| 349 | Lugari | lime_fertilizer_5 | 0.023 | 0.000 | 0.787 |
| 358 | Lugari | K | 131.120 | 139.581 | 0.787 |
| 364 | Lugari | Fe | 123.110 | 118.683 | 0.787 |
| 382 | Lugari | clay_soil | 0.011 | 0.035 | 0.787 |
| 393 | Matete | yield | 62.239 | 56.717 | 0.787 |
| 397 | Matete | lime_fertilizer_5 | 0.000 | 0.021 | 0.787 |
| 419 | Matete | Total.N | 0.145 | 0.137 | 0.787 |
| 424 | Matete | alt | 1546.540 | 1596.905 | 0.787 |
| 429 | Matete | own.sheep | 0.065 | 0.128 | 0.787 |
| 433 | Migori | female | 0.581 | 0.506 | 0.787 |
| 444 | Migori | compost_fertilizer_5 | 1.686 | 2.024 | 0.787 |
| 445 | Migori | lime_fertilizer_5 | 0.023 | 0.000 | 0.787 |
| 456 | Migori | Mn | 182.830 | 176.837 | 0.787 |
| 460 | Migori | Fe | 169.676 | 174.534 | 0.787 |
| 485 | Rachuonyo | chickens | 9.000 | 7.622 | 0.787 |
| 495 | Rachuonyo | legum_maincrop_5 | 0.119 | 0.067 | 0.787 |
| 497 | Rachuonyo | C.E.C | 13.759 | 13.063 | 0.787 |
| 517 | Rachuonyo | hilltop | 0.036 | 0.011 | 0.787 |
| 538 | Rongo | dap.acre | 46.444 | 53.115 | 0.787 |
| 557 | Rongo | Na | 36.941 | 35.865 | 0.787 |
| 562 | Rongo | Total.C | 2.304 | 2.221 | 0.787 |
| 581 | Suneka | chickens | 12.529 | 5.829 | 0.787 |
| 583 | Suneka | sheep | 0.000 | 0.029 | 0.787 |
| 589 | Suneka | lime_fertilizer_5 | 0.059 | 0.000 | 0.787 |
| 613 | Suneka | hilltop | 0.029 | 0.000 | 0.787 |
| 621 | Suneka | own.sheep | 0.000 | 0.029 | 0.787 |
| 625 | Webuye | female | 0.738 | 0.833 | 0.787 |
| 630 | Webuye | pigs | 0.000 | 0.024 | 0.787 |
| 633 | Webuye | yield | 68.143 | 62.905 | 0.787 |
| 638 | Webuye | fertilizer_5 | 0.119 | 0.000 | 0.787 |
| 640 | Webuye | legum_intercrop_5 | 3.167 | 3.619 | 0.787 |
| 647 | Webuye | Mg | 145.766 | 162.641 | 0.787 |
| 663 | Webuye | crop.maize | 1.000 | 0.976 | 0.787 |
| 668 | Webuye | own.pigs | 0.000 | 0.024 | 0.787 |
| 576 | Rongo | sandy_soil | 0.564 | 0.641 | 0.791 |
| 641 | Webuye | C.E.C | 9.007 | 9.813 | 0.793 |
| 518 | Rachuonyo | hillside | 0.405 | 0.333 | 0.793 |
| 305 | Kisii | C.E.C | 13.942 | 14.492 | 0.804 |
| 477 | Migori | own.sheep | 0.209 | 0.153 | 0.804 |
| 657 | Webuye | Zn | 3.855 | 4.111 | 0.804 |
| 659 | Webuye | Total.N | 0.161 | 0.154 | 0.804 |
| 313 | Kisii | pH | 5.764 | 5.842 | 0.805 |
| 454 | Migori | K | 215.241 | 207.129 | 0.81 |
| 40 | Busia | alt | 1198.566 | 1225.322 | 0.811 |
| 65 | Butere | C.E.C | 7.720 | 8.072 | 0.811 |
| 118 | Chwele | K | 103.957 | 113.435 | 0.811 |
| 168 | Gem | Mn | 166.868 | 171.951 | 0.811 |
| 296 | Kisii | field_size | 0.343 | 0.309 | 0.811 |
| 344 | Lugari | field_size | 0.688 | 0.753 | 0.811 |
| 442 | Migori | dap.acre | 54.533 | 61.589 | 0.811 |
| 449 | Migori | C.E.C | 15.104 | 14.547 | 0.811 |
| 473 | Migori | own.cows | 0.791 | 0.729 | 0.811 |
| 512 | Rachuonyo | S | 12.912 | 13.322 | 0.811 |
| 550 | Rongo | K | 182.358 | 171.577 | 0.811 |
| 4 | Busia | goats | 0.535 | 0.333 | 0.812 |
| 18 | Busia | Cu | 4.016 | 3.715 | 0.812 |
| 19 | Busia | EC | 47.956 | 45.685 | 0.812 |
| 41 | Busia | own.cows | 0.651 | 0.556 | 0.812 |
| 92 | Butere | own.pigs | 0.034 | 0.059 | 0.812 |
| 110 | Chwele | fertilizer_5 | 0.225 | 0.077 | 0.812 |
| 190 | Gem | clay_soil | 0.179 | 0.222 | 0.812 |
| 203 | Kakamega (South) | chemical_fertilizer_5 | 4.527 | 4.348 | 0.812 |
| 274 | Kakamega B (North) | Total.C | 2.248 | 2.182 | 0.812 |
| 309 | Kisii | Hp | 0.478 | 0.413 | 0.812 |
| 536 | Rongo | field_size | 0.484 | 0.452 | 0.812 |
| 543 | Rongo | legum_maincrop_5 | 0.359 | 0.462 | 0.812 |
| 597 | Suneka | Hp | 0.428 | 0.478 | 0.812 |
| 606 | Suneka | P | 9.322 | 7.719 | 0.812 |
| 578 | Suneka | age | 46.235 | 43.114 | 0.815 |
| 466 | Migori | Total.C | 2.637 | 2.698 | 0.828 |
| 545 | Rongo | C.E.C | 12.273 | 11.640 | 0.828 |
| 644 | Webuye | Exch.Al | 0.088 | 0.102 | 0.828 |
| 25 | Busia | pH | 5.906 | 5.997 | 0.83 |
| 431 | Matete | loam_soil | 0.891 | 0.830 | 0.83 |
| 432 | Matete | sandy_soil | 0.109 | 0.170 | 0.83 |
| 627 | Webuye | cows | 2.119 | 1.786 | 0.83 |
| 254 | Kakamega B (North) | fertilizer_5 | 0.160 | 0.071 | 0.84 |
| 406 | Matete | K | 114.570 | 122.255 | 0.846 |
| 634 | Webuye | dap.acre | 53.223 | 47.324 | 0.854 |
| 76 | Butere | Fe | 141.381 | 144.766 | 0.855 |
| 132 | Chwele | valley | 0.900 | 0.949 | 0.855 |
| 366 | Lugari | P | 11.787 | 10.411 | 0.855 |
| 645 | Webuye | Hp | 0.291 | 0.269 | 0.855 |
| 212 | Kakamega (South) | Exch.Al | 0.176 | 0.151 | 0.858 |
| 631 | Webuye | sheep | 0.095 | 0.190 | 0.859 |
| 15 | Busia | legum_maincrop_5 | 0.023 | 0.067 | 0.86 |
| 122 | Chwele | B | 0.134 | 0.150 | 0.86 |
| 202 | Kakamega (South) | dap.acre | 51.854 | 55.610 | 0.86 |
| 417 | Matete | Zn | 4.093 | 4.256 | 0.86 |
| 491 | Rachuonyo | chemical_fertilizer_5 | 2.702 | 2.889 | 0.86 |
| 522 | Rachuonyo | own.goats | 0.321 | 0.267 | 0.86 |
| 523 | Rachuonyo | own.chickens | 0.845 | 0.800 | 0.86 |
| 539 | Rongo | chemical_fertilizer_5 | 2.949 | 3.179 | 0.86 |
| 573 | Rongo | own.sheep | 0.179 | 0.231 | 0.86 |
| 618 | Suneka | own.goats | 0.147 | 0.086 | 0.86 |
| 68 | Butere | Exch.Al | 0.228 | 0.196 | 0.862 |
| 119 | Chwele | Mg | 107.356 | 117.334 | 0.863 |
| 223 | Kakamega (South) | PSI | 145.493 | 140.141 | 0.863 |
| 271 | Kakamega B (North) | PSI | 125.869 | 121.524 | 0.863 |
| 401 | Matete | C.E.C | 8.499 | 9.037 | 0.863 |
| 418 | Matete | Total.C | 2.101 | 2.020 | 0.863 |
| 537 | Rongo | yield | 48.329 | 45.039 | 0.863 |
| 563 | Rongo | Total.N | 0.151 | 0.147 | 0.863 |
| 658 | Webuye | Total.C | 2.106 | 2.015 | 0.863 |
| 471 | Migori | crop.maize | 0.907 | 0.871 | 0.864 |
| 20 | Busia | Exch.Al | 0.124 | 0.104 | 0.865 |
| 526 | Rachuonyo | clay_soil | 0.024 | 0.044 | 0.865 |
| 152 | Gem | field_size | 0.474 | 0.447 | 0.867 |
| 494 | Rachuonyo | fertilizer_5 | 0.417 | 0.311 | 0.867 |
| 500 | Rachuonyo | Exch.Al | 0.095 | 0.106 | 0.867 |
| 49 | Butere | female | 0.733 | 0.689 | 0.867 |
| 32 | Busia | S | 14.130 | 13.689 | 0.873 |
| 124 | Chwele | Fe | 135.330 | 130.891 | 0.874 |
| 327 | Kisii | crop.maize | 0.851 | 0.900 | 0.876 |
| 574 | Rongo | clay_soil | 0.064 | 0.038 | 0.876 |
| 21 | Busia | Hp | 0.330 | 0.307 | 0.878 |
| 74 | Butere | B | 0.102 | 0.107 | 0.878 |
| 101 | Chwele | chickens | 10.025 | 8.385 | 0.878 |
| 181 | Gem | hilltop | 0.022 | 0.037 | 0.878 |
| 182 | Gem | hillside | 0.358 | 0.319 | 0.878 |
| 187 | Gem | own.chickens | 0.940 | 0.919 | 0.878 |
| 191 | Gem | loam_soil | 0.799 | 0.763 | 0.878 |
| 207 | Kakamega (South) | legum_maincrop_5 | 0.356 | 0.247 | 0.878 |
| 276 | Kakamega B (North) | valley | 0.829 | 0.869 | 0.878 |
| 286 | Kakamega B (North) | clay_soil | 0.073 | 0.048 | 0.878 |
| 292 | Kisii | goats | 0.702 | 0.540 | 0.878 |
| 413 | Matete | Na | 29.027 | 29.532 | 0.878 |
| 427 | Matete | own.chickens | 0.935 | 0.894 | 0.878 |
| 501 | Rachuonyo | Hp | 0.290 | 0.307 | 0.878 |
| 553 | Rongo | pH | 5.899 | 5.856 | 0.878 |
| 568 | Rongo | alt | 1283.056 | 1232.582 | 0.878 |
| 571 | Rongo | own.chickens | 0.885 | 0.846 | 0.878 |
| 177 | Gem | Zn | 4.140 | 4.232 | 0.879 |
| 120 | Chwele | Mn | 92.440 | 96.506 | 0.879 |
| 450 | Migori | Cu | 4.040 | 3.945 | 0.879 |
| 234 | Kakamega (South) | own.goats | 0.143 | 0.180 | 0.88 |
| 575 | Rongo | loam_soil | 0.372 | 0.321 | 0.88 |
| 255 | Kakamega B (North) | legum_maincrop_5 | 0.622 | 0.488 | 0.88 |
| 312 | Kisii | Mn | 221.671 | 215.785 | 0.88 |
| 594 | Suneka | Cu | 4.218 | 4.054 | 0.88 |
| 407 | Matete | Mg | 133.007 | 142.882 | 0.89 |
| 176 | Gem | S | 15.437 | 15.655 | 0.891 |
| 244 | Kakamega B (North) | goats | 0.500 | 0.405 | 0.891 |
| 310 | Kisii | K | 212.583 | 218.497 | 0.891 |
| 67 | Butere | EC | 40.878 | 41.683 | 0.892 |
| 143 | Chwele | loam_soil | 0.650 | 0.718 | 0.892 |
| 237 | Kakamega (South) | own.sheep | 0.055 | 0.079 | 0.892 |
| 330 | Kisii | own.goats | 0.319 | 0.260 | 0.892 |
| 502 | Rachuonyo | K | 191.338 | 185.835 | 0.892 |
| 455 | Migori | Mg | 291.552 | 283.010 | 0.893 |
| 559 | Rongo | PSI | 92.332 | 95.860 | 0.893 |
| 664 | Webuye | alt | 1540.877 | 1568.134 | 0.893 |
| 251 | Kakamega B (North) | chemical_fertilizer_5 | 4.244 | 4.096 | 0.893 |
| 69 | Butere | Hp | 0.445 | 0.426 | 0.9 |
| 389 | Matete | chickens | 13.674 | 11.894 | 0.9 |
| 411 | Matete | Ca | 809.328 | 858.827 | 0.9 |
| 227 | Kakamega (South) | Total.N | 0.156 | 0.160 | 0.9 |
| 96 | Butere | sandy_soil | 0.190 | 0.160 | 0.904 |
| 373 | Lugari | hilltop | 0.011 | 0.023 | 0.906 |
| 384 | Lugari | sandy_soil | 0.011 | 0.023 | 0.906 |
| 404 | Matete | Exch.Al | 0.119 | 0.109 | 0.908 |
| 125 | Chwele | Na | 30.915 | 31.522 | 0.91 |
| 264 | Kakamega B (North) | Mn | 130.041 | 133.470 | 0.913 |
| 577 | Suneka | female | 0.529 | 0.600 | 0.913 |
| 184 | Gem | alt | 1193.128 | 1166.167 | 0.915 |
| 365 | Lugari | Na | 28.192 | 27.857 | 0.915 |
| 415 | Matete | PSI | 111.212 | 105.877 | 0.915 |
| 436 | Migori | goats | 1.070 | 0.906 | 0.915 |
| 530 | Rongo | age | 42.590 | 41.397 | 0.915 |
| 585 | Suneka | yield | 50.529 | 47.086 | 0.915 |
| 590 | Suneka | fertilizer_5 | 0.147 | 0.086 | 0.915 |
| 363 | Lugari | Ca | 946.378 | 988.467 | 0.916 |
| 84 | Butere | valley | 0.991 | 0.983 | 0.917 |
| 112 | Chwele | legum_intercrop_5 | 3.175 | 2.923 | 0.917 |
| 410 | Matete | B | 0.144 | 0.151 | 0.917 |
| 451 | Migori | EC | 57.572 | 56.792 | 0.917 |
| 461 | Migori | Na | 37.542 | 37.136 | 0.917 |
| 157 | Gem | lime_fertilizer_5 | 0.045 | 0.022 | 0.923 |
| 6 | Busia | pigs | 1.047 | 0.844 | 0.926 |
| 55 | Butere | sheep | 0.371 | 0.487 | 0.926 |
| 81 | Butere | Zn | 4.290 | 4.364 | 0.926 |
| 114 | Chwele | Cu | 1.812 | 1.875 | 0.926 |
| 290 | Kisii | age | 45.809 | 44.420 | 0.926 |
| 416 | Matete | S | 11.442 | 11.164 | 0.926 |
| 520 | Rachuonyo | alt | 1359.050 | 1311.449 | 0.926 |
| 540 | Rongo | compost_fertilizer_5 | 0.692 | 0.821 | 0.926 |
| 62 | Butere | fertilizer_5 | 0.095 | 0.134 | 0.927 |
| 66 | Butere | Cu | 2.609 | 2.530 | 0.927 |
| 131 | Chwele | Total.N | 0.103 | 0.099 | 0.927 |
| 278 | Kakamega B (North) | hillside | 0.159 | 0.131 | 0.927 |
| 287 | Kakamega B (North) | loam_soil | 0.841 | 0.869 | 0.927 |
| 345 | Lugari | yield | 58.636 | 60.395 | 0.927 |
| 394 | Matete | dap.acre | 48.412 | 52.225 | 0.927 |
| 414 | Matete | P | 12.972 | 11.862 | 0.927 |
| 472 | Migori | alt | 1275.836 | 1310.417 | 0.927 |
| 505 | Rachuonyo | pH | 6.182 | 6.144 | 0.927 |
| 588 | Suneka | compost_fertilizer_5 | 0.824 | 1.029 | 0.927 |
| 135 | Chwele | crop.maize | 0.950 | 0.923 | 0.938 |
| 141 | Chwele | own.sheep | 0.050 | 0.077 | 0.938 |
| 308 | Kisii | Exch.Al | 0.161 | 0.139 | 0.938 |
| 624 | Suneka | sandy_soil | 0.088 | 0.057 | 0.938 |
| 162 | Gem | Cu | 3.353 | 3.407 | 0.938 |
| 295 | Kisii | sheep | 0.043 | 0.020 | 0.938 |
| 395 | Matete | chemical_fertilizer_5 | 4.239 | 4.106 | 0.938 |
| 467 | Migori | Total.N | 0.177 | 0.179 | 0.944 |
| 192 | Gem | sandy_soil | 0.022 | 0.015 | 0.949 |
| 402 | Matete | Cu | 2.336 | 2.404 | 0.949 |
| 629 | Webuye | chickens | 11.524 | 9.548 | 0.949 |
| 447 | Migori | legum_maincrop_5 | 0.140 | 0.188 | 0.95 |
| 648 | Webuye | Mn | 131.760 | 128.340 | 0.95 |
| 372 | Lugari | valley | 0.807 | 0.779 | 0.952 |
| 30 | Busia | P | 8.264 | 7.408 | 0.955 |
| 474 | Migori | own.goats | 0.326 | 0.294 | 0.955 |
| 134 | Chwele | hillside | 0.075 | 0.051 | 0.958 |
| 144 | Chwele | sandy_soil | 0.325 | 0.282 | 0.958 |
| 180 | Gem | valley | 0.619 | 0.644 | 0.958 |
| 280 | Kakamega B (North) | alt | 1012.737 | 1054.354 | 0.958 |
| 322 | Kisii | Total.C | 2.845 | 2.877 | 0.958 |
| 324 | Kisii | valley | 0.277 | 0.240 | 0.958 |
| 326 | Kisii | hillside | 0.723 | 0.760 | 0.958 |
| 337 | Lugari | female | 0.773 | 0.744 | 0.958 |
| 353 | Lugari | C.E.C | 9.406 | 9.648 | 0.958 |
| 390 | Matete | pigs | 0.283 | 0.191 | 0.958 |
| 392 | Matete | field_size | 0.609 | 0.652 | 0.958 |
| 421 | Matete | hilltop | 0.087 | 0.064 | 0.958 |
| 452 | Migori | Exch.Al | 0.067 | 0.071 | 0.958 |
| 457 | Migori | pH | 6.186 | 6.212 | 0.958 |
| 519 | Rachuonyo | crop.maize | 0.929 | 0.944 | 0.958 |
| 548 | Rongo | Exch.Al | 0.114 | 0.108 | 0.958 |
| 636 | Webuye | compost_fertilizer_5 | 1.810 | 2.000 | 0.958 |
| 31 | Busia | PSI | 148.837 | 144.400 | 0.959 |
| 130 | Chwele | Total.C | 1.534 | 1.496 | 0.959 |
| 328 | Kisii | alt | 1530.459 | 1571.339 | 0.959 |
| 340 | Lugari | goats | 0.091 | 0.128 | 0.959 |
| 511 | Rachuonyo | PSI | 141.108 | 145.216 | 0.959 |
| 542 | Rongo | fertilizer_5 | 0.208 | 0.169 | 0.959 |
| 596 | Suneka | Exch.Al | 0.166 | 0.183 | 0.959 |
| 623 | Suneka | loam_soil | 0.853 | 0.886 | 0.959 |
| 669 | Webuye | own.sheep | 0.071 | 0.095 | 0.959 |
| 348 | Lugari | compost_fertilizer_5 | 1.598 | 1.482 | 0.961 |
| 316 | Kisii | Fe | 129.242 | 127.516 | 0.961 |
| 318 | Kisii | P | 2.809 | 2.614 | 0.963 |
| 580 | Suneka | goats | 0.235 | 0.343 | 0.963 |
| 165 | Gem | Hp | 0.410 | 0.400 | 0.967 |
| 23 | Busia | Mg | 176.760 | 183.696 | 0.967 |
| 175 | Gem | PSI | 128.096 | 130.272 | 0.967 |
| 260 | Kakamega B (North) | Exch.Al | 0.107 | 0.100 | 0.967 |
| 299 | Kisii | chemical_fertilizer_5 | 2.298 | 2.120 | 0.967 |
| 300 | Kisii | compost_fertilizer_5 | 1.255 | 1.120 | 0.967 |
| 412 | Matete | Fe | 126.753 | 128.220 | 0.967 |
| 423 | Matete | crop.maize | 0.935 | 0.915 | 0.967 |
| 544 | Rongo | legum_intercrop_5 | 1.538 | 1.449 | 0.967 |
| 552 | Rongo | Mn | 150.975 | 154.477 | 0.967 |
| 617 | Suneka | own.cows | 0.706 | 0.743 | 0.978 |
| 8 | Busia | field_size | 0.703 | 0.733 | 0.981 |
| 10 | Busia | dap.acre | 37.975 | 40.215 | 0.981 |
| 14 | Busia | fertilizer_5 | 0.302 | 0.356 | 0.981 |
| 17 | Busia | C.E.C | 10.515 | 10.782 | 0.981 |
| 29 | Busia | Na | 29.657 | 29.921 | 0.981 |
| 36 | Busia | valley | 0.907 | 0.889 | 0.981 |
| 50 | Butere | age | 44.595 | 44.008 | 0.981 |
| 54 | Butere | pigs | 0.086 | 0.101 | 0.981 |
| 80 | Butere | S | 14.108 | 13.954 | 0.981 |
| 88 | Butere | alt | 1273.988 | 1263.968 | 0.981 |
| 94 | Butere | clay_soil | 0.190 | 0.202 | 0.981 |
| 95 | Butere | loam_soil | 0.621 | 0.639 | 0.981 |
| 103 | Chwele | sheep | 0.125 | 0.103 | 0.981 |
| 105 | Chwele | yield | 61.175 | 59.231 | 0.981 |
| 106 | Chwele | dap.acre | 45.398 | 47.258 | 0.981 |
| 109 | Chwele | lime_fertilizer_5 | 0.175 | 0.128 | 0.981 |
| 116 | Chwele | Exch.Al | 0.076 | 0.070 | 0.981 |
| 164 | Gem | Exch.Al | 0.150 | 0.145 | 0.981 |
| 166 | Gem | K | 104.789 | 103.416 | 0.981 |
| 169 | Gem | pH | 5.802 | 5.791 | 0.981 |
| 171 | Gem | Ca | 785.984 | 795.618 | 0.981 |
| 186 | Gem | own.goats | 0.224 | 0.237 | 0.981 |
| 196 | Kakamega (South) | goats | 0.330 | 0.303 | 0.981 |
| 199 | Kakamega (South) | sheep | 0.154 | 0.191 | 0.981 |
| 204 | Kakamega (South) | compost_fertilizer_5 | 3.088 | 3.157 | 0.981 |
| 206 | Kakamega (South) | fertilizer_5 | 0.044 | 0.034 | 0.981 |
| 228 | Kakamega (South) | valley | 0.648 | 0.663 | 0.981 |
| 230 | Kakamega (South) | hillside | 0.308 | 0.292 | 0.981 |
| 232 | Kakamega (South) | alt | 1411.321 | 1399.184 | 0.981 |
| 248 | Kakamega B (North) | field_size | 0.671 | 0.690 | 0.981 |
| 250 | Kakamega B (North) | dap.acre | 47.820 | 48.786 | 0.981 |
| 283 | Kakamega B (North) | own.chickens | 0.939 | 0.929 | 0.981 |
| 289 | Kisii | female | 0.745 | 0.720 | 0.981 |
| 306 | Kisii | Cu | 5.101 | 5.146 | 0.981 |
| 315 | Kisii | Ca | 1375.107 | 1398.179 | 0.981 |
| 317 | Kisii | Na | 33.998 | 33.832 | 0.981 |
| 321 | Kisii | Zn | 5.301 | 5.373 | 0.981 |
| 329 | Kisii | own.cows | 0.745 | 0.720 | 0.981 |
| 351 | Lugari | legum_maincrop_5 | 0.057 | 0.071 | 0.981 |
| 361 | Lugari | pH | 5.787 | 5.774 | 0.981 |
| 367 | Lugari | PSI | 128.062 | 126.435 | 0.981 |
| 371 | Lugari | Total.N | 0.160 | 0.159 | 0.981 |
| 374 | Lugari | hillside | 0.182 | 0.198 | 0.981 |
| 396 | Matete | compost_fertilizer_5 | 1.783 | 1.915 | 0.981 |
| 405 | Matete | Hp | 0.335 | 0.327 | 0.981 |
| 408 | Matete | Mn | 126.124 | 127.453 | 0.981 |
| 409 | Matete | pH | 5.741 | 5.765 | 0.981 |
| 440 | Migori | field_size | 0.471 | 0.458 | 0.981 |
| 453 | Migori | Hp | 0.257 | 0.262 | 0.981 |
| 459 | Migori | Ca | 1692.366 | 1723.831 | 0.981 |
| 462 | Migori | P | 19.144 | 19.842 | 0.981 |
| 465 | Migori | Zn | 6.479 | 6.544 | 0.981 |
| 469 | Migori | hilltop | 0.081 | 0.071 | 0.981 |
| 470 | Migori | hillside | 0.442 | 0.459 | 0.981 |
| 506 | Rachuonyo | B | 0.179 | 0.182 | 0.981 |
| 514 | Rachuonyo | Total.C | 2.290 | 2.273 | 0.981 |
| 525 | Rachuonyo | own.sheep | 0.440 | 0.422 | 0.981 |
| 527 | Rachuonyo | loam_soil | 0.774 | 0.756 | 0.981 |
| 546 | Rongo | Cu | 3.522 | 3.474 | 0.981 |
| 551 | Rongo | Mg | 217.827 | 213.987 | 0.981 |
| 564 | Rongo | valley | 0.833 | 0.821 | 0.981 |
| 566 | Rongo | hillside | 0.167 | 0.179 | 0.981 |
| 607 | Suneka | PSI | 144.626 | 142.453 | 0.981 |
| 614 | Suneka | hillside | 0.206 | 0.229 | 0.981 |
| 616 | Suneka | alt | 1368.148 | 1338.315 | 0.981 |
| 619 | Suneka | own.chickens | 0.794 | 0.771 | 0.981 |
| 665 | Webuye | own.cows | 0.762 | 0.738 | 0.981 |
| 671 | Webuye | loam_soil | 0.357 | 0.333 | 0.981 |
| 672 | Webuye | sandy_soil | 0.643 | 0.667 | 0.981 |
| 44 | Busia | own.pigs | 0.442 | 0.422 | 0.985 |
| 129 | Chwele | Zn | 3.531 | 3.571 | 0.985 |
| 347 | Lugari | chemical_fertilizer_5 | 4.425 | 4.388 | 0.985 |
| 480 | Migori | sandy_soil | 0.151 | 0.141 | 0.985 |
| 535 | Rongo | sheep | 0.513 | 0.551 | 0.985 |
| 567 | Rongo | crop.maize | 0.756 | 0.769 | 0.985 |
| 507 | Rachuonyo | Ca | 1487.987 | 1507.098 | 0.985 |
| 304 | Kisii | legum_intercrop_5 | 1.957 | 1.880 | 0.994 |
| 3 | Busia | cows | 1.512 | 1.578 | 0.994 |
| 356 | Lugari | Exch.Al | 0.096 | 0.099 | 0.995 |
| 79 | Butere | PSI | 113.234 | 112.391 | 0.996 |
| 167 | Gem | Mg | 138.339 | 139.362 | 1 |
| 136 | Chwele | alt | 955.358 | 937.070 | 1 |
| 376 | Lugari | alt | 1077.900 | 1095.638 | 1 |
| 499 | Rachuonyo | EC | 52.448 | 52.215 | 1 |
| 642 | Webuye | Cu | 2.726 | 2.699 | 1 |
| 7 | Busia | sheep | 0.093 | 0.089 | 1 |
| 12 | Busia | compost_fertilizer_5 | 1.372 | 1.378 | 1 |
| 22 | Busia | K | 130.795 | 132.082 | 1 |
| 24 | Busia | Mn | 174.610 | 175.526 | 1 |
| 26 | Busia | B | 0.159 | 0.160 | 1 |
| 38 | Busia | hillside | 0.093 | 0.089 | 1 |
| 45 | Busia | own.sheep | 0.070 | 0.067 | 1 |
| 46 | Busia | clay_soil | 0.023 | 0.022 | 1 |
| 47 | Busia | loam_soil | 0.884 | 0.889 | 1 |
| 48 | Busia | sandy_soil | 0.093 | 0.089 | 1 |
| 56 | Butere | field_size | 0.536 | 0.530 | 1 |
| 64 | Butere | legum_intercrop_5 | 2.629 | 2.622 | 1 |
| 72 | Butere | Mn | 135.295 | 136.073 | 1 |
| 93 | Butere | own.sheep | 0.138 | 0.134 | 1 |
| 97 | Chwele | female | 0.725 | 0.718 | 1 |
| 102 | Chwele | pigs | 0.425 | 0.436 | 1 |
| 104 | Chwele | field_size | 0.575 | 0.574 | 1 |
| 111 | Chwele | legum_maincrop_5 | 0.050 | 0.051 | 1 |
| 126 | Chwele | P | 25.547 | 25.175 | 1 |
| 138 | Chwele | own.goats | 0.225 | 0.231 | 1 |
| 140 | Chwele | own.pigs | 0.125 | 0.128 | 1 |
| 148 | Gem | goats | 0.545 | 0.541 | 1 |
| 154 | Gem | dap.acre | 53.324 | 53.624 | 1 |
| 156 | Gem | compost_fertilizer_5 | 2.664 | 2.652 | 1 |
| 159 | Gem | legum_maincrop_5 | 0.261 | 0.259 | 1 |
| 161 | Gem | C.E.C | 8.234 | 8.204 | 1 |
| 170 | Gem | B | 0.110 | 0.110 | 1 |
| 172 | Gem | Fe | 130.034 | 130.058 | 1 |
| 174 | Gem | P | 5.694 | 5.604 | 1 |
| 220 | Kakamega (South) | Fe | 131.662 | 131.618 | 1 |
| 229 | Kakamega (South) | hilltop | 0.044 | 0.045 | 1 |
| 239 | Kakamega (South) | loam_soil | 0.648 | 0.652 | 1 |
| 240 | Kakamega (South) | sandy_soil | 0.352 | 0.348 | 1 |
| 279 | Kakamega B (North) | crop.maize | 0.976 | 0.976 | 1 |
| 288 | Kakamega B (North) | sandy_soil | 0.085 | 0.083 | 1 |
| 303 | Kisii | legum_maincrop_5 | 0.319 | 0.300 | 1 |
| 319 | Kisii | PSI | 210.692 | 211.678 | 1 |
| 333 | Kisii | own.sheep | 0.021 | 0.020 | 1 |
| 335 | Kisii | loam_soil | 0.851 | 0.860 | 1 |
| 336 | Kisii | sandy_soil | 0.149 | 0.140 | 1 |
| 354 | Lugari | Cu | 2.296 | 2.306 | 1 |
| 357 | Lugari | Hp | 0.307 | 0.309 | 1 |
| 359 | Lugari | Mg | 159.265 | 160.031 | 1 |
| 375 | Lugari | crop.maize | 0.989 | 0.988 | 1 |
| 378 | Lugari | own.goats | 0.045 | 0.047 | 1 |
| 386 | Matete | age | 43.870 | 44.128 | 1 |
| 387 | Matete | cows | 2.217 | 2.191 | 1 |
| 420 | Matete | valley | 0.500 | 0.511 | 1 |
| 422 | Matete | hillside | 0.413 | 0.426 | 1 |
| 425 | Matete | own.cows | 0.761 | 0.766 | 1 |
| 439 | Migori | sheep | 0.593 | 0.588 | 1 |
| 463 | Migori | PSI | 110.488 | 110.625 | 1 |
| 468 | Migori | valley | 0.477 | 0.471 | 1 |
| 478 | Migori | clay_soil | 0.116 | 0.118 | 1 |
| 479 | Migori | loam_soil | 0.733 | 0.741 | 1 |
| 487 | Rachuonyo | sheep | 1.095 | 1.067 | 1 |
| 490 | Rachuonyo | dap.acre | 73.343 | 73.779 | 1 |
| 515 | Rachuonyo | Total.N | 0.173 | 0.173 | 1 |
| 528 | Rachuonyo | sandy_soil | 0.202 | 0.200 | 1 |
| 529 | Rongo | female | 0.667 | 0.667 | 1 |
| 549 | Rongo | Hp | 0.348 | 0.347 | 1 |
| 555 | Rongo | Ca | 1177.134 | 1182.208 | 1 |
| 556 | Rongo | Fe | 169.246 | 168.722 | 1 |
| 579 | Suneka | cows | 1.471 | 1.429 | 1 |
| 591 | Suneka | legum_maincrop_5 | 0.088 | 0.086 | 1 |
| 600 | Suneka | Mn | 188.015 | 188.691 | 1 |
| 604 | Suneka | Fe | 146.640 | 146.812 | 1 |
| 612 | Suneka | valley | 0.765 | 0.771 | 1 |
| 615 | Suneka | crop.maize | 0.941 | 0.943 | 1 |
| 622 | Suneka | clay_soil | 0.059 | 0.057 | 1 |
| 626 | Webuye | age | 45.024 | 45.167 | 1 |
| 639 | Webuye | legum_maincrop_5 | 0.024 | 0.024 | 1 |
| 652 | Webuye | Fe | 116.392 | 116.915 | 1 |
| 660 | Webuye | valley | 0.952 | 0.952 | 1 |
| 662 | Webuye | hillside | 0.048 | 0.048 | 1 |
| 667 | Webuye | own.chickens | 0.905 | 0.905 | 1 |
| 13 | Busia | lime_fertilizer_5 | 0.000 | 0.000 | NA |
| 238 | Kakamega (South) | clay_soil | 0.000 | 0.000 | NA |
| 294 | Kisii | pigs | 0.000 | 0.000 | NA |
| 301 | Kisii | lime_fertilizer_5 | 0.000 | 0.000 | NA |
| 325 | Kisii | hilltop | 0.000 | 0.000 | NA |
| 332 | Kisii | own.pigs | 0.000 | 0.000 | NA |
| 334 | Kisii | clay_soil | 0.000 | 0.000 | NA |
| 430 | Matete | clay_soil | 0.000 | 0.000 | NA |
| 438 | Migori | pigs | 0.000 | 0.000 | NA |
| 476 | Migori | own.pigs | 0.000 | 0.000 | NA |
| 486 | Rachuonyo | pigs | 0.000 | 0.000 | NA |
| 524 | Rachuonyo | own.pigs | 0.000 | 0.000 | NA |
| 534 | Rongo | pigs | 0.000 | 0.000 | NA |
| 541 | Rongo | lime_fertilizer_5 | 0.000 | 0.000 | NA |
| 565 | Rongo | hilltop | 0.000 | 0.000 | NA |
| 572 | Rongo | own.pigs | 0.000 | 0.000 | NA |
| 582 | Suneka | pigs | 0.000 | 0.000 | NA |
| 620 | Suneka | own.pigs | 0.000 | 0.000 | NA |
| 637 | Webuye | lime_fertilizer_5 | 0.000 | 0.000 | NA |
| 661 | Webuye | hilltop | 0.000 | 0.000 | NA |
| 670 | Webuye | clay_soil | 0.000 | 0.000 | NA |
Demographic variables:
Agricultural practice variables:
Soil Variables:
write.csv(dist.output, file=paste(od, "ke district balance.csv", sep="/"), row.names=T)
suppressMessages(library(stargazer))
d[,grep("_5", names(d))] <- sapply(d[,grep("_5", names(d))], function(x){
ifelse(x==-99, NA, x)
})
apply(d[,grep("_5", names(d))],2, table)
$chemical_fertilizer_5
0 1 2 3 4 5 296 149 179 207 93 1106
$compost_fertilizer_5
0 1 2 3 4 5 988 176 174 116 55 523
$lime_fertilizer_5
0 1 2 3 5 2007 7 12 1 5
$fertilizer_5
0 1 2 3 4 5 1841 90 39 23 7 29
$legum_maincrop_5
0 1 2 3 4 5 1771 142 56 24 14 25
$legum_intercrop_5
0 1 2 3 4 5 507 174 244 189 150 768
Plot the variables as they are. Should we consider a log transform as we used for the Rwanda variables?
for(i in names(d)[grep("_5", names(d))]) {
print(
ggplot(d, aes(x=d[,i])) + geom_histogram(binwidth=1) +
labs(x = paste("Past five seasons of ", i, sep = ""))
)
}
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing non-finite values (stat_bin).
## Warning: Removed 3 rows containing non-finite values (stat_bin).
d$logFert <- log(d$chemical_fertilizer_5+1)
d$logCompost <- log(d$compost_fertilizer_5+1)
d$logLime <- log(d$lime_fertilizer_5+1)
d$logLegmain <- log(d$legum_maincrop_5+1)
#d$logLegint <- log(d$legum_intercrop_5+1)
logVars <- paste(names(d[grep("log", names(d))]), collapse=" + ")
cor(d[,grep("log", names(d))], use = "complete.obs")
## logFert logCompost logLime logLegmain
## logFert 1.00000000 0.12629624 0.03840900 0.08136505
## logCompost 0.12629624 1.00000000 0.03135643 0.05436087
## logLime 0.03840900 0.03135643 1.00000000 -0.02479574
## logLegmain 0.08136505 0.05436087 -0.02479574 1.00000000
Look at farmers by duration of tenure farming with 1AF We want to understand, at least with an initial naive baseline sense, what is the cumulative effect of Tubura practices on soil health outcomes?
We will look only at current 1AF farmers and compare first year farmers to farmers with more experience with Tubura.
oafOnly <- d[which(d$oaf==1 & d$seasons_oaf>=1),]
for(i in 1:length(soilVars)){
print(
ggplot(oafOnly, aes(x=as.factor(seasons_oaf), y=oafOnly[,soilVars[i]])) +
geom_boxplot() +
labs(x="OAF Tenure", y=soilVars[i], title = paste("KE baseline soil by tenure - ", soilVars[i], sep = ""))
)
}
tenureSum <- aggregate(oafOnly[,out.list], by=list(oafOnly$seasons_oaf), function(x){
round(mean(x, na.rm=T),2)
})
tenureSum <- as.data.frame(t(tenureSum))
colnames(tenureSum) <- c(paste(seq(1,8,1), " seas.", sep=""))
print(kable(tenureSum))
| 1 seas. | 2 seas. | 3 seas. | 4 seas. | 5 seas. | 6 seas. | 7 seas. | 8 seas. | |
|---|---|---|---|---|---|---|---|---|
| Group.1 | 1.00 | 2.00 | 3.00 | 4.00 | 5.00 | 6.00 | 7.00 | 8.00 |
| female | 0.71 | 0.69 | 0.77 | 0.67 | 0.68 | 0.79 | 0.60 | 0.67 |
| age | 43.13 | 45.55 | 47.37 | 51.85 | 51.76 | 51.34 | 49.20 | 58.67 |
| cows | 1.82 | 1.96 | 2.17 | 2.87 | 2.70 | 2.76 | 1.60 | 4.83 |
| goats | 0.58 | 0.45 | 0.42 | 0.86 | 0.71 | 0.34 | 0.20 | 0.33 |
| chickens | 11.06 | 10.73 | 10.26 | 12.06 | 15.95 | 11.41 | 8.00 | 21.00 |
| pigs | 0.17 | 0.21 | 0.14 | 0.23 | 0.21 | 0.69 | 0.20 | 0.83 |
| sheep | 0.46 | 0.52 | 0.43 | 0.44 | 0.54 | 0.17 | 0.40 | 0.17 |
| field_size | 0.50 | 0.54 | 0.55 | 0.65 | 0.53 | 0.65 | 0.45 | 0.62 |
| yield | 48.79 | 62.83 | 63.74 | 65.09 | 63.08 | 59.62 | 54.80 | 51.50 |
| dap.acre | 56.65 | 50.06 | 50.75 | 45.26 | 44.01 | 42.35 | 62.93 | 53.78 |
| can.acre | 49.77 | 42.67 | 43.78 | 40.03 | 41.40 | 35.16 | 57.00 | 40.75 |
| npk.acre | 21.01 | 68.00 | 20.00 | NaN | NaN | NaN | NaN | NaN |
| urea.acre | 42.07 | 52.00 | 26.67 | 24.00 | NaN | 4.00 | NaN | NaN |
| chemical_fertilizer_5 | 3.04 | 3.48 | 3.82 | 4.35 | 4.37 | 4.41 | 4.20 | 5.00 |
| compost_fertilizer_5 | 1.61 | 1.75 | 1.73 | 2.12 | 2.86 | 2.62 | 1.20 | 1.83 |
| lime_fertilizer_5 | 0.02 | 0.06 | 0.02 | 0.09 | 0.08 | 0.00 | 0.00 | 0.00 |
| fertilizer_5 | 0.22 | 0.25 | 0.21 | 0.28 | 0.16 | 0.24 | 0.00 | 0.00 |
| legum_maincrop_5 | 0.20 | 0.17 | 0.28 | 0.55 | 0.57 | 0.59 | 0.00 | 0.00 |
| legum_intercrop_5 | 2.54 | 2.51 | 2.53 | 2.69 | 2.89 | 3.28 | 2.40 | 2.17 |
| C.E.C | 10.30 | 10.31 | 10.58 | 10.14 | 9.66 | 9.54 | 6.61 | 9.95 |
| Cu | 3.21 | 3.30 | 3.24 | 3.23 | 3.65 | 3.54 | 1.90 | 3.23 |
| EC | 47.98 | 46.63 | 47.44 | 46.19 | 44.33 | 41.66 | 43.20 | 43.46 |
| Exch.Al | 0.13 | 0.14 | 0.13 | 0.11 | 0.18 | 0.09 | 0.14 | 0.10 |
| Hp | 0.37 | 0.35 | 0.35 | 0.34 | 0.47 | 0.33 | 0.37 | 0.34 |
| K | 142.63 | 141.07 | 143.31 | 136.61 | 131.29 | 121.91 | 90.35 | 130.13 |
| Mg | 179.27 | 182.43 | 182.20 | 174.70 | 168.92 | 168.02 | 92.00 | 159.78 |
| Mn | 153.35 | 161.79 | 157.42 | 156.02 | 169.54 | 179.08 | 103.36 | 152.35 |
| pH | 5.82 | 5.87 | 5.86 | 5.87 | 5.70 | 5.95 | 5.69 | 5.82 |
| B | 0.15 | 0.15 | 0.16 | 0.15 | 0.14 | 0.12 | 0.10 | 0.14 |
| Ca | 1009.45 | 1043.20 | 1054.48 | 1045.94 | 905.38 | 1020.28 | 618.14 | 913.43 |
| Fe | 139.24 | 137.46 | 132.84 | 135.61 | 132.93 | 144.18 | 138.20 | 132.92 |
| Na | 31.79 | 31.54 | 31.10 | 30.99 | 30.57 | 31.21 | 29.75 | 29.81 |
| P | 14.49 | 12.50 | 10.15 | 10.50 | 6.40 | 6.52 | 24.05 | 9.50 |
| PSI | 122.41 | 125.61 | 130.29 | 126.75 | 148.03 | 123.62 | 85.38 | 129.65 |
| S | 12.80 | 13.04 | 12.88 | 13.39 | 14.68 | 15.45 | 10.93 | 13.20 |
| Zn | 4.79 | 4.80 | 4.67 | 4.68 | 4.62 | 4.91 | 3.61 | 4.35 |
| Total.C | 2.18 | 2.15 | 2.21 | 2.15 | 2.16 | 2.07 | 1.66 | 2.10 |
| Total.N | 0.16 | 0.16 | 0.16 | 0.16 | 0.17 | 0.15 | 0.12 | 0.16 |
| valley | 0.72 | 0.72 | 0.74 | 0.72 | 0.68 | 0.72 | 0.80 | 0.67 |
| hilltop | 0.03 | 0.02 | 0.02 | 0.03 | 0.03 | 0.03 | 0.00 | 0.00 |
| hillside | 0.25 | 0.26 | 0.24 | 0.24 | 0.29 | 0.24 | 0.20 | 0.33 |
| crop.maize | 0.87 | 0.93 | 0.97 | 0.95 | 0.98 | 0.97 | 1.00 | 1.00 |
| alt | 1265.77 | 1303.87 | 1158.44 | 1222.10 | 1353.65 | 1324.20 | 1499.11 | 1464.94 |
| own.cows | 0.68 | 0.75 | 0.81 | 0.88 | 0.86 | 0.90 | 0.80 | 1.00 |
| own.goats | 0.22 | 0.22 | 0.20 | 0.33 | 0.32 | 0.28 | 0.20 | 0.33 |
| own.chickens | 0.89 | 0.93 | 0.90 | 0.92 | 0.92 | 0.86 | 1.00 | 1.00 |
| own.pigs | 0.05 | 0.08 | 0.07 | 0.09 | 0.10 | 0.28 | 0.20 | 0.33 |
| own.sheep | 0.17 | 0.17 | 0.20 | 0.16 | 0.14 | 0.10 | 0.20 | 0.17 |
| clay_soil | 0.09 | 0.05 | 0.07 | 0.08 | 0.08 | 0.10 | 0.00 | 0.17 |
| loam_soil | 0.71 | 0.78 | 0.73 | 0.78 | 0.73 | 0.55 | 0.40 | 0.50 |
| sandy_soil | 0.21 | 0.17 | 0.20 | 0.14 | 0.19 | 0.34 | 0.60 | 0.33 |
oafOnly$tenured <- ifelse(oafOnly$seasons_oaf>=3,0,1)
toMatch <- c("npk", "urea", "g_intercrop")
tenureList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]
tenure <- do.call(rbind, lapply(tenureList, function(x) {
out <- t.test(oafOnly[,x] ~ oafOnly[,"tenured"], data=oafOnly)
tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
return(tab)
}))
# use p.adjust with bonferroni correction
tenure$pvalue <- p.adjust(tenure$pvalue, method="fdr")
rownames(tenure) <- tenureList
tenure <- tenure[order(tenure$pvalue),]
tenure$pvalue <- ifelse(tenure[, 3] < 0.001, "< 0.001", round(tenure[, 3], 3))
colnames(tenure) <- c("Tenured (3+)","New (< 3)", "p-value")
print(kable(tenure))
| Tenured (3+) | New (< 3) | p-value | |
|---|---|---|---|
| chemical_fertilizer_5 | 3.217 | 4.141 | < 0.001 |
| age | 44.094 | 49.980 | < 0.001 |
| yield | 54.379 | 63.318 | < 0.001 |
| own.cows | 0.711 | 0.848 | < 0.001 |
| cows | 1.877 | 2.549 | < 0.001 |
| crop.maize | 0.898 | 0.966 | < 0.001 |
| P | 13.700 | 9.457 | < 0.001 |
| legum_maincrop_5 | 0.189 | 0.425 | 0.002 |
| dap.acre | 53.582 | 47.490 | 0.005 |
| field_size | 0.517 | 0.585 | 0.007 |
| compost_fertilizer_5 | 1.665 | 2.112 | 0.007 |
| S | 12.892 | 13.540 | 0.01 |
| own.pigs | 0.059 | 0.103 | 0.065 |
| can.acre | 45.950 | 41.428 | 0.07 |
| Na | 31.688 | 30.939 | 0.08 |
| PSI | 123.685 | 131.293 | 0.113 |
| EC | 47.440 | 45.912 | 0.142 |
| Fe | 138.535 | 134.661 | 0.194 |
| own.goats | 0.216 | 0.264 | 0.238 |
| K | 142.012 | 136.476 | 0.425 |
| Zn | 4.793 | 4.661 | 0.425 |
| Total.N | 0.157 | 0.160 | 0.425 |
| alt | 1280.940 | 1235.703 | 0.427 |
| legum_intercrop_5 | 2.531 | 2.695 | 0.45 |
| chickens | 10.931 | 12.046 | 0.552 |
| Mn | 156.708 | 160.161 | 0.624 |
| Mg | 180.529 | 174.821 | 0.625 |
| female | 0.698 | 0.724 | 0.658 |
| Cu | 3.242 | 3.315 | 0.711 |
| goats | 0.532 | 0.586 | 0.726 |
| pigs | 0.184 | 0.236 | 0.726 |
| sheep | 0.485 | 0.425 | 0.726 |
| lime_fertilizer_5 | 0.032 | 0.049 | 0.726 |
| Exch.Al | 0.135 | 0.127 | 0.726 |
| loam_soil | 0.738 | 0.718 | 0.726 |
| C.E.C | 10.299 | 10.132 | 0.753 |
| B | 0.148 | 0.145 | 0.795 |
| Hp | 0.360 | 0.368 | 0.807 |
| sandy_soil | 0.191 | 0.204 | 0.807 |
| clay_soil | 0.071 | 0.078 | 0.866 |
| fertilizer_5 | 0.231 | 0.216 | 0.9 |
| Ca | 1022.886 | 1013.531 | 0.9 |
| Total.C | 2.171 | 2.162 | 0.9 |
| valley | 0.718 | 0.724 | 0.9 |
| hillside | 0.258 | 0.250 | 0.9 |
| pH | 5.843 | 5.840 | 0.94 |
| hilltop | 0.025 | 0.026 | 0.94 |
| own.chickens | 0.906 | 0.908 | 0.94 |
| own.sheep | 0.174 | 0.172 | 0.94 |
Demographic variables:
Agricultural practice variables:
Soil Variables:
Keep only soil variables with reliable r2 values for the regressions. Confirm this list with Emmanuel and confirm when we’ll get the C and N estimates
#soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
# "pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn")
soilVars <- c("Mg", "pH", "Ca", "Exch.Al", "C.E.C", "Total.C", "Total.N")
list1 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", logVars, "+ as.factor(SiteName)", sep="")), data=d)
return(mod)
})
Guide to interpreting Linear-Log Models
plm.log <- function(x, range){
beta = round(summary(x)$coefficients[range,1],3)
beta.pval = summary(x)$coefficients[range,4]
beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
beta.pval < 0.05, "**", ifelse(
beta.pval < 0.1, "*", "")))
#beta.pval = round(beta.pval, 3)
outcome = paste(beta, beta.conv, sep = "")
outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
res = data.frame(outcome, stringsAsFactors = F)
return(res)
}
ke.table17 <- do.call(cbind, lapply(list1, function(x){
plm.log(x, 2:5)
}))
colnames(ke.table17) <- soilVars
rownames(ke.table17) <- c(unlist(strsplit(gsub(" \\+ ", " ", logVars), " ")), "constant")
ke.table17 <- ke.table17[,c("pH","Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "C.E.C")]
write.csv(ke.table17, file=paste(od, "ketable17.csv", sep="/"), row.names=T)
# stargazer for table 17
stargazer(list1, type="html",
title = "2016A Kenya Soil Health Baseline - Agronomic Practice Models (log)",
covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost",
"Seasons of Lime", "Seasons of Legume (main)",
"Seasons of Legume (inter)"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for site",
omit=c("SiteName"), out=paste(od, "ke_baseline_agprac.htm",sep="/"))
stargazer(list2, type="html",
title = "2016A Kenya Soil Health Baseline - Naive Tenure Models",
covariate.labels = c("OAF Tenure"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for site",
omit=c("SiteName"), out=paste(od, "ke_baseline_tenure.htm",sep="/"))
list3 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", logVars, "+ seasons_oaf + as.factor(SiteName)", sep="")), data=d)
return(mod)
})
stargazer(list3, type="html",
title = "2016A Kenya Soil Health Baseline - Ag Practice and Tenure",
covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost",
"Seasons of Lime", "Seasons of Legume (main)",
"Seasons of Legume (inter)", "OAF Tenure"),
dep.var.caption = "",
dep.var.labels = "",
column.labels = c(gsub("m3.","", soilVars)),
notes = "Includes FE for site",
omit=c("SiteName"), out=paste(od, "ke_baseline_ag_tenure.htm", sep="/"))
First, convert all quantites to kg/acre and kg/ha. Then check for collinearity, set up and execute models
cor(d[,acreInputs[c(1:2,5)] ], use="complete.obs")
## dap.acre can.acre compost.acre
## dap.acre 1.000000000 0.73288819 -0.007709517
## can.acre 0.732888188 1.00000000 -0.024655908
## compost.acre -0.007709517 -0.02465591 1.000000000
As with the Rwanda baseline soil health study, agricultural input quantity use is highly correlated.
plm.t16 <- function(x, range){
beta = round(summary(x)$coefficients[range,1],3)
beta.pval = round(summary(x)$coefficients[range,4],3)
beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
beta.pval < 0.05, "**", ifelse(
beta.pval < 0.1, "*", "")))
#beta.pval = round(beta.pval, 3)
outcome = paste(beta, " (", beta.pval, ")", sep = "")
outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
res = data.frame(outcome, stringsAsFactors = F)
return(res)
}
previousSeasonfert <- paste("dap.acre", "as.factor(SiteName)", sep=" + ")
list.t18 <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", previousSeasonfert, sep="")), data=d)
return(mod)
})
table18 <- do.call(cbind, lapply(list.t18, function(x){
plm.t16(x, 2)
}))
colnames(table18) <- soilVars
rownames(table18) <- c("DAP (kg/acre)", "constant")
table18 <- table18[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18, file=paste(od, "table18_dap.csv", sep = "/"),
row.names = T)
previousSeasoncompost <- paste("compost.acre", "as.factor(SiteName)", sep=" + ")
list.t18b <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", previousSeasoncompost, sep="")), data=d)
return(mod)
})
table18b <- do.call(cbind, lapply(list.t18b, function(x){
plm.t16(x, 2)
}))
colnames(table18b) <- soilVars
rownames(table18b) <- c("Compost (kg/acre)", "constant")
table18b <- table18b[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18b, file=paste(od, "table18_compost.csv", sep = "/"),
row.names = T)
The compost regression results are suprising. Look at Carbon and compost scatter plot
ggplot(d, aes(x = compost.acre, y=Total.C)) + geom_point() +
scale_x_continuous(limits=c(0,50)) + # remove larger values from the graph.
geom_smooth(method='lm')
## Warning: Removed 668 rows containing non-finite values (stat_smooth).
## Warning: Removed 668 rows containing missing values (geom_point).
plm.t18 <- function(x, range){
beta = round(summary(x)$coefficients[range,1],4)
beta.pval = round(summary(x)$coefficients[range,4],3)
beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
beta.pval < 0.05, "**", ifelse(
beta.pval < 0.1, "*", "")))
#beta.pval = round(beta.pval, 3)
outcome = paste(beta, " (", beta.pval, ")", sep = "")
outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
res = data.frame(outcome, stringsAsFactors = F)
return(res)
}
previousSeasonCan <- paste("can.acre", "as.factor(SiteName)", sep=" + ")
list.t18c <- lapply(soilVars, function(x){
mod <- lm(as.formula(paste("d[,x] ~", previousSeasonCan, sep="")), data=d)
return(mod)
})
table18c <- do.call(cbind, lapply(list.t18c, function(x){
plm.t18(x, 2)
}))
colnames(table18c) <- soilVars
rownames(table18c) <- c("CAN (kg/acre)", "constant")
table18c <- table18c[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18c, file=paste(od, "table18_can.csv", sep = "/"),
row.names = T)
dist.sum <- aggregate(d[,out.list], by=list(d$DistrictName), function(x){
return(c(
paste(
round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")",
" (", round(sd(x, na.rm=T),2), ")", sep = ""
)
))
})
write.csv(dist.sum, file=paste(od, "district covariate summary.csv", sep = "/"))
site.sum <- aggregate(d[,out.list], by=list(d$SiteName), function(x){
return(c(
paste(
round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")",
" (", round(sd(x, na.rm=T),2), ")", sep = ""
)
))
})
write.csv(site.sum, file=paste(od, "site covariate summary.csv", sep = "/"))
largerSite <- table(d$SiteName)>10
largerSite <- rownames(as.matrix(largerSite[largerSite==T]))
site.sum.tr <- aggregate(d[d$SiteName %in% largerSite,out.list], by=list(d[d$SiteName %in% largerSite, "SiteName"]), function(x){
return(c(
paste(
round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")",
" (", round(sd(x, na.rm=T),2), ")", sep = ""
)
))
})
write.csv(site.sum.tr, file=paste(od, "site truncated covariate summary.csv", sep = "/"))
toRemove <- "female"
genderBalance <- out.list[!out.list %in% toRemove]
equal <- do.call(rbind, lapply(genderBalance, function(x){
out <- t.test(d[,x] ~ d[,"female"], data=d)
tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
tab[,1:2] <- round(tab[,1:2],3)
names(tab) <- c(names(out[[5]]), "pvalue")
#tab[,3] <- p.adjust(tab[,3], method="holm")
#tab[,3] <- ifelse(tab[,3] < 0.001, "< 0.001", round(tab[,3],3))
#print(tab)
return(tab)
}))
rownames(equal) <- NULL
# order variables
equal$pvalue <- p.adjust(equal$pvalue, method="fdr")
rownames(equal) <- genderBalance
equal <- equal[order(equal$pvalue),]
equal$pvalue <- ifelse(equal$pvalue < 0.001, "< 0.001", round(equal$pvalue,3))
colnames(equal) <- c("Male Farmers","Female Farmers", "p-value")
write.csv(equal, file=paste(od, "female farmer status.csv", sep = "/"), row.names=T)
We need to do a more rigorous job of accounting for differences between Tubura farmers and identified control farmers. Execute propensity score matching (PSM) to identify control farmers that overlap with Tubura farmers with regard to their likelihood of being a Tubura farmer.
psmVars <- paste(out.list <- c("female", "age", "cows", "goats", "chickens","pigs", "sheep", "SiteName"), collapse=" + ")
psmCheck <- unlist(strsplit(gsub("\\+", "", as.vector(psmVars))," ", fixed=TRUE))
psmCheck <- psmCheck[-which(psmCheck=="")]
naOut <- cbind(unlist(lapply(psmCheck, function(x){sum(is.na(d[,x]))})), psmCheck)
naOut
## psmCheck
## [1,] "0" "female"
## [2,] "0" "age"
## [3,] "0" "cows"
## [4,] "0" "goats"
## [5,] "0" "chickens"
## [6,] "2" "pigs"
## [7,] "0" "sheep"
## [8,] "0" "SiteName"
We have missing values that are preventing the prediction function that preceeds PSM from running properly. Let’s for now just remove yield as a conributing variable.
h <- d[complete.cases(d[,psmCheck]),]
psmVars <- paste(psmCheck, collapse=" + ")
reg <- glm(as.formula(paste("oaf ~", psmVars, sep="")), family= binomial(link="logit"), data=h)
# summarize predicted probabilities
pr <- data.frame(pr_score = predict(reg, type='response'), treat = h$oaf)
# graph
psmGraph <- ggplot() + geom_histogram(data=subset(pr, pr$treat==1), aes(x = pr_score, y=..count.., fill=as.factor(treat)), bins=80, position = "identity") + geom_histogram(data=subset(pr, pr$treat==0), aes(x=pr_score, y=-..count.., fill=as.factor(treat)), bins=80, position = "identity") +
scale_y_continuous(limits=c(-150,150)) +
labs(title ="PSM score overlap", x = "PSM score", y="Farmer count",
fill="1AF/Control")
print(psmGraph)
pdf(file=paste(od, "ke_baseline_psm_overlap.pdf", sep="/"), height=8.5, width=11)
print(psmGraph)
dev.off()
## quartz_off_screen
## 2
d$age2 <- d$age^2
coreVars = c("female", "age", "SiteName", "cows", "goats", "chickens", "pigs", "sheep",
"sandy_soil", "loam_soil")
psmList <- list(
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="Ca"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="Mg"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="pH"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="Total.C"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="Total.N"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="chemical_fertilizer_5"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="Exch.Al"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="compost_fertilizer_5"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="legum_maincrop_5"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="legum_intercrop_5"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="logFert"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="dap.acre"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="can.acre"),
list(tr = "oaf",
psmVars = paste(coreVars,
collapse=" + "),
y="compost.acre")
)
#lime_fertilizer_5 >> so few farmers use it that we get a bad match.
# PSM
set.seed(20161102)
m <- lapply(psmList, function(listInput){
naCheck <- unlist(strsplit(gsub("\\+", "", as.vector(listInput$psmVars))," ", fixed=TRUE))
naCheck <- naCheck[-which(naCheck=="")]
# keep complete cases of outcome variable
k <- d[complete.cases(d[,naCheck]),]
k <- k[complete.cases(k[,listInput$y]),]
# run glm regression:
reg <- glm(as.formula(paste(listInput$tr, "~", listInput$psmVars, sep="")), family= binomial(link="logit"), data=k)
suppressWarnings(
mod <- Match(Y = k[,listInput$y], Tr = k[,listInput$tr], X = reg$fitted.values, ties=FALSE, replace=FALSE, caliper=0.25, estimand = "ATE")
)
matchRes <- MatchBalance(k[,listInput$tr] ~ k[,listInput$y], match.out=mod, nboots=500, data=k, print.level = 0)
#print(listInput$y)
return(list(mod, matchRes))
})
The models can now vary by outcome. Let’s see if we can improve our results.
matchRes <- do.call(rbind, lapply(1:length(m), function(model){
val <- as.data.frame(cbind(
standard.diff=m[[model]][[2]]$AfterMatching[[1]]$sdiff,
var.ratio = m[[model]][[2]]$AfterMatching[[1]]$var.ratio,
sdiff.adj = m[[model]][[2]]$AfterMatching[[1]]$sdiff/100))
return(val)
}))
namesInput <- NULL
for(i in 1:length(psmList)){
namesInput[i] <- psmList[[i]]$y
}
rownames(matchRes) <- namesInput
print(kable(matchRes))
| standard.diff | var.ratio | sdiff.adj | |
|---|---|---|---|
| Ca | -13.6136287 | 0.6789672 | -0.1361363 |
| Mg | -5.0967582 | 1.0804687 | -0.0509676 |
| pH | -15.0434472 | 0.8554127 | -0.1504345 |
| Total.C | 4.4842565 | 1.0177585 | 0.0448426 |
| Total.N | 0.8195454 | 1.0005472 | 0.0081955 |
| chemical_fertilizer_5 | 4.5777306 | 0.9433016 | 0.0457773 |
| Exch.Al | 4.1576889 | 1.1818138 | 0.0415769 |
| compost_fertilizer_5 | -3.8350953 | 0.9332778 | -0.0383510 |
| legum_maincrop_5 | 4.5800790 | 1.2010159 | 0.0458008 |
| legum_intercrop_5 | -24.6327075 | 0.9618697 | -0.2463271 |
| logFert | 6.0512109 | 0.9198516 | 0.0605121 |
| dap.acre | -11.7756456 | 0.6762797 | -0.1177565 |
| can.acre | -30.4057248 | 0.5112521 | -0.3040572 |
| compost.acre | 1.1631865 | 0.9475964 | 0.0116319 |
write.csv(matchRes, file=paste(od, "ke psm performance.csv", sep = "/"), row.names=T)
So few farmers use lime fertilizer that we don’t get a good match. I could make it binary but it’s so overwhelmingly the case that none of the farmers use lime that I’m just going to drop it.
We also don’t get super great matches on DAP and CAN. I’m going to push forward with this but I’m going to push forward with the results. We should flag this.
coefTable <- do.call(rbind, lapply(1:length(m), function(model){
beta = round(m[[model]][[1]]$est.noadj,3)
mean.Tr = round(m[[model]][[2]]$AfterMatching[[1]][[3]], 2)
mean.Co = round(m[[model]][[2]]$AfterMatching[[1]][[4]], 2)
pval = m[[model]][[2]]$AfterMatching[[1]][[10]][[3]] # p.value
#pval = (1 - pnorm(abs(m[[model]][[1]]$est/m[[model]][[1]]$se.standard))) * 2
pval = ifelse(pval < 0.001, "0.001", round(pval, 3))
res = data.frame(beta, mean.Tr, mean.Co, pval, stringsAsFactors = F)
return(res)
}))
row.names(coefTable) <- namesInput
coefTable$pval.adj <- round(p.adjust(coefTable$pval, method="fdr"),3)
print(kable(coefTable))
| beta | mean.Tr | mean.Co | pval | pval.adj | |
|---|---|---|---|---|---|
| Ca | -76.899 | 1005.76 | 1082.66 | 0.001 | 0.004 |
| Mg | -4.711 | 174.93 | 179.64 | 0.143 | 0.286 |
| pH | -0.069 | 5.83 | 5.90 | 0.001 | 0.004 |
| Total.C | 0.025 | 2.16 | 2.13 | 0.201 | 0.291 |
| Total.N | 0.000 | 0.16 | 0.16 | 0.82 | 0.820 |
| chemical_fertilizer_5 | 0.086 | 3.59 | 3.50 | 0.208 | 0.291 |
| Exch.Al | 0.007 | 0.13 | 0.12 | 0.234 | 0.298 |
| compost_fertilizer_5 | -0.081 | 1.87 | 1.95 | 0.28 | 0.327 |
| legum_maincrop_5 | 0.038 | 0.27 | 0.23 | 0.168 | 0.291 |
| legum_intercrop_5 | -0.495 | 2.68 | 3.17 | 0.001 | 0.004 |
| logFert | 0.037 | 1.38 | 1.34 | 0.101 | 0.236 |
| dap.acre | -3.281 | 51.61 | 54.90 | 0.014 | 0.039 |
| can.acre | -6.971 | 43.33 | 50.30 | 0.001 | 0.004 |
| compost.acre | 22.763 | 279.59 | 256.82 | 0.753 | 0.811 |
write.csv(coefTable, file=paste(od, "ke psm coefficients.csv", sep = "/"),
row.names = T)
# sort by the order Eric wants
t7order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")
table7vars <- paste(t7order, collapse = "|")
table7 <- coefTable[grep(table7vars, rownames(coefTable)), ]
table7 <- table7[order(match(rownames(table7), t7order)),]
write.csv(table7, file=paste(od, "table7.csv", sep = "/"), row.names = T)
# 11/17 added lime
t8order <- c("chemical_fertilizer_5", "compost_fertilizer_5", "legum_maincrop_5",
"legum_intercrop_5")
table8vars <- paste(t8order, collapse = "|")
table8 <- coefTable[grep(table8vars, rownames(coefTable)), ]
table8 <- table8[order(match(rownames(table8), t8order)),]
write.csv(table8, file=paste(od, "table8.csv", sep = "/"), row.names = T)
#table 3
t9order <- c("dap.acre", "can.acre", "compost.acre")
table9vars <- paste(t9order, collapse = "|")
table9 <- coefTable[grep(table9vars, rownames(coefTable)), ]
table9 <- table9[order(match(rownames(table9), t9order)),]
write.csv(table9, file=paste(od, "table9.csv", sep = "/"))
This requires us to re-run the PSM matching process, confirm that our models are sound and then output the results ala table 2 again.
I’m comparing season 1 clients to clients with more tenure in the PSM matching.
d$tenure.tiers <- ifelse(d$oaf==1 & d$seasons_oaf==1, 0,
ifelse(d$oaf==1 & d$seasons_oaf>1, 1, NA))
coreVars = c("female", "age", "SiteName", "cows", "goats", "chickens", "pigs", "sheep")
psmList <- list(
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="Ca"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="Mg"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="pH"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="Total.C"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="Total.N"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="chemical_fertilizer_5"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="Exch.Al"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="compost_fertilizer_5"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="legum_maincrop_5"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="legum_intercrop_5"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="logFert"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="dap.acre"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="can.acre"),
list(tr = "tenure.tiers",
psmVars = paste(coreVars,
collapse=" + "),
y="compost.acre")
)
#lime_fertilizer_5 >> so few farmers use it that we get a bad match.
# PSM
set.seed(20161102)
mappend <- lapply(psmList, function(listInput){
naCheck <- unlist(strsplit(gsub("\\+", "", as.vector(listInput$psmVars))," ", fixed=TRUE))
naCheck <- naCheck[-which(naCheck=="")]
# keep complete cases of outcome variable
k <- d[complete.cases(d[,naCheck]),]
k <- k[complete.cases(k[,listInput$y]),]
k <- k[complete.cases(k[,listInput$tr]),]
# run glm regression:
reg <- glm(as.formula(paste(listInput$tr, "~", listInput$psmVars, sep="")), family= binomial(link="logit"), data=k)
suppressWarnings(
mod <- Match(Y = k[,listInput$y], Tr = k[,listInput$tr], X = reg$fitted.values, ties=FALSE, replace=FALSE, caliper=0.25, estimand = "ATE")
)
matchRes <- MatchBalance(k[,listInput$tr] ~ k[,listInput$y], match.out=mod, nboots=500, data=k, print.level = 0)
#print(listInput$y)
return(list(mod, matchRes))
})
The models can now vary by outcome. Let’s see if we can improve our results. (appendix)
matchRes <- do.call(rbind, lapply(1:length(mappend), function(model){
val <- as.data.frame(cbind(
standard.diff=mappend[[model]][[2]]$AfterMatching[[1]]$sdiff,
var.ratio = mappend[[model]][[2]]$AfterMatching[[1]]$var.ratio,
sdiff.adj = mappend[[model]][[2]]$AfterMatching[[1]]$sdiff/100))
return(val)
}))
namesInput <- NULL
for(i in 1:length(psmList)){
namesInput[i] <- psmList[[i]]$y
}
rownames(matchRes) <- namesInput
print(kable(matchRes))
| standard.diff | var.ratio | sdiff.adj | |
|---|---|---|---|
| Ca | 0.9238112 | 1.2218850 | 0.0092381 |
| Mg | -15.5729015 | 0.9676569 | -0.1557290 |
| pH | 4.9974209 | 0.8986447 | 0.0499742 |
| Total.C | -22.4745136 | 1.1666893 | -0.2247451 |
| Total.N | -31.0867873 | 1.0640974 | -0.3108679 |
| chemical_fertilizer_5 | 31.9321294 | 0.7947044 | 0.3193213 |
| Exch.Al | 2.2636258 | 3.1970113 | 0.0226363 |
| compost_fertilizer_5 | 17.1447816 | 1.0188232 | 0.1714478 |
| legum_maincrop_5 | 11.9680100 | 1.2999763 | 0.1196801 |
| legum_intercrop_5 | -14.7087869 | 0.9302600 | -0.1470879 |
| logFert | 28.9243301 | 0.7743529 | 0.2892433 |
| dap.acre | -39.2628790 | 0.5683184 | -0.3926288 |
| can.acre | -70.1928722 | 0.2701376 | -0.7019287 |
| compost.acre | -5.4679030 | 0.1045200 | -0.0546790 |
write.csv(matchRes, file=paste(od, "ke psm performance-appendix.csv", sep = "/"), row.names=T)
So few farmers use lime fertilizer that we don’t get a good match. I could make it binary but it’s so overwhelmingly the case that none of the farmers use lime that I’m just going to drop it.
We also don’t get super great matches on DAP and CAN. I’m going to push forward with this but I’m going to push forward with the results. We should flag this.
coefTable.append <- do.call(rbind, lapply(1:length(mappend), function(model){
beta = round(mappend[[model]][[1]]$est.noadj,3)
mean.Tr = round(mappend[[model]][[2]]$AfterMatching[[1]][[3]], 2)
mean.Co = round(mappend[[model]][[2]]$AfterMatching[[1]][[4]], 2)
pval = mappend[[model]][[2]]$AfterMatching[[1]][[10]][[3]] # p.value
#pval = (1 - pnorm(abs(m[[model]][[1]]$est/m[[model]][[1]]$se.standard))) * 2
pval = ifelse(pval < 0.001, "0.001", round(pval, 3))
res = data.frame(beta, mean.Tr, mean.Co, pval, stringsAsFactors = F)
return(res)
}))
row.names(coefTable.append) <- namesInput
coefTable.append$pval.adj <- round(p.adjust(coefTable.append$pval, method="fdr"),3)
print(kable(coefTable.append))
| beta | mean.Tr | mean.Co | pval | pval.adj | |
|---|---|---|---|---|---|
| Ca | 4.608 | 906.47 | 901.87 | 0.9 | 0.900 |
| Mg | -12.895 | 156.10 | 169.00 | 0.051 | 0.089 |
| pH | 0.020 | 5.80 | 5.78 | 0.542 | 0.690 |
| Total.C | -0.122 | 2.03 | 2.15 | 0.003 | 0.007 |
| Total.N | -0.013 | 0.14 | 0.16 | 0.001 | 0.003 |
| chemical_fertilizer_5 | 0.568 | 3.80 | 3.23 | 0.001 | 0.003 |
| Exch.Al | 0.004 | 0.14 | 0.13 | 0.725 | 0.823 |
| compost_fertilizer_5 | 0.357 | 2.08 | 1.73 | 0.027 | 0.054 |
| legum_maincrop_5 | 0.095 | 0.28 | 0.19 | 0.11 | 0.154 |
| legum_intercrop_5 | -0.295 | 2.65 | 2.95 | 0.058 | 0.090 |
| logFert | 0.171 | 1.44 | 1.27 | 0.001 | 0.003 |
| dap.acre | -9.527 | 46.24 | 55.77 | 0.001 | 0.003 |
| can.acre | -12.420 | 39.16 | 51.58 | 0.001 | 0.003 |
| compost.acre | -69.499 | 351.15 | 420.64 | 0.764 | 0.823 |
write.csv(coefTable.append, file=paste(od, "ke psm coefficients - appendix.csv", sep = "/"),row.names = T)
# sort by the order Eric wants
t7order.a <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")
table7vars.a <- paste(t7order.a, collapse = "|")
table7.a <- coefTable.append[grep(table7vars.a, rownames(coefTable.append)), ]
table7.a <- table7.a[order(match(rownames(table7.a), t7order.a)),]
write.csv(table7.a, file=paste(od, "table7-appendix.csv", sep = "/"), row.names = T)
# 11/17 added lime
t8order.a <- c("chemical_fertilizer_5", "compost_fertilizer_5", "legum_maincrop_5",
"legum_intercrop_5")
table8vars.a <- paste(t8order.a, collapse = "|")
table8.a <- coefTable.append[grep(table8vars.a, rownames(coefTable.append)), ]
table8.a <- table8.a[order(match(rownames(table8.a), t8order.a)),]
write.csv(table8.a, file=paste(od, "table8-appendix.csv", sep = "/"), row.names = T)
#table 3
t9order.a <- c("dap.acre", "can.acre", "compost.acre")
table9vars.a <- paste(t9order.a, collapse = "|")
table9.a <- coefTable.append[grep(table9vars.a, rownames(coefTable.append)), ]
table9.a <- table9.a[order(match(rownames(table9.a), t9order.a)),]
write.csv(table9.a, file=paste(od, "table9-appendix.csv", sep = "/"))
thresh <- d %>% group_by(oaf) %>% dplyr::summarize(
count = n(),
ph = sum(pH<5.8),
carbon = sum(Total.C < 2),
nitrogen = sum(Total.N < 0.1),
calcium = sum(Ca < 720),
magnesium = sum(Mg < 100)
#aluminum = sum(ExAl)
) %>% mutate(
under.ph = paste(paste(round(ph/count,4)*100, "%", sep=""), " (", ph, ")", sep=""),
under.carbon = paste(paste(round(carbon/count,4)*100,"%", sep=""), " (", carbon, ")", sep=""),
under.nitrogen = paste(paste(round(nitrogen/count,4)*100, "%", sep=""), " (", nitrogen, ")", sep=""),
under.calcium = paste(paste(round(calcium/count,4)*100, "%", sep=""), " (", calcium, ")", sep=""),
under.mag = paste(paste(round(magnesium/count,4)*100,"%", sep=""), " (", magnesium, ")", sep="")
) %>% as.data.frame()
thresh <- thresh[, c("oaf", names(thresh)[grep("under", names(thresh))])]
thresh <- t(thresh)
colnames(thresh) = thresh[1, ] # the first row will be the header
colnames(thresh) = c("non-client", "client")
thresh = thresh[-1, ]
write.csv(thresh, file=paste(od, "table1_rw_thresholds.csv", sep = "/"), row.names = T)
tenureTab.add <- lapply(1:length(m), function(model){
dm <- as.data.frame(rbind(d[m[[model]][[1]]$index.treated,],
d[m[[model]][[1]]$index.control,]))
dm$client_tenure <- dm$oaf*dm$seasons_oaf
mod <- lm(as.formula(paste("dm[,psmList[[model]]$y] ~", "seasons_oaf + as.factor(SiteName)", sep ="")), data=dm)
return(mod)
})
modNames <- unlist(lapply(psmList, function(x){
return(x$y)
}))
plm.tenure <- function(x){
intercept = round(x$coefficients[[1]],3)
beta = round(x$coefficients[[2]],3)
int.pval = summary(x)$coefficients[1,4]
int.pval = ifelse(int.pval < 0.001, "< 0.001", round(as.numeric(int.pval),3))
beta.pval = summary(x)$coefficients[2,4]
beta.pval = ifelse(beta.pval < 0.001, "< 0.001", round(as.numeric(beta.pval),3))
res = data.frame(intercept, int.pval, beta, beta.pval, stringsAsFactors = F)
return(res)
}
tenure.reg <- do.call(rbind, lapply(tenureTab.add, function(x){
plm.tenure(x)
}))
rownames(tenure.reg) <- modNames
t14order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "chemical_fertilizer_5",
"logFert", "compost_fertilizer_5", "legum_maincrop_5", "n_season_fallow")
t14vars <- paste(t14order, collapse = "|")
tenure.reg <- tenure.reg[grep(t14vars,rownames(tenure.reg)),]
tenure.reg <- tenure.reg[order(match(row.names(tenure.reg), t14order)),]
write.csv(tenure.reg, file=paste(od, "table14.csv", sep = "/"), row.names=T)
Produce a simple map of where our observations are
if (!(exists("kenya"))){
# Only need to geocode once per session library(dismo)
kenya <- try(geocode("Kenya"))
# If the internet fails, use a local value
if (class(kenya) == "try-error") {
kenya <- ""
}
}
See here for more on using markerClusterOptions in leaflet.
In the map below, the larger green circles are One Acre Fund farmers and the smaller blue circles are control farmers.
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e)
pal <- colorNumeric(c("navy", "green"), domain=unique(ss$oaf))
map <- leaflet() %>% addTiles() %>%
setView(lng=kenya$longitude, lat=kenya$latitude, zoom=6) %>%
addCircleMarkers(lng=ss$lon, lat=ss$lat,
radius= ifelse(ss$oaf==1, 10,6),
color = pal(ss$oaf),
clusterOptions = markerClusterOptions(disableClusteringAtZoom=12, spiderfyOnMaxZoom=FALSE))
map
Notes: We have a lot of GPS points piled on the Bungoma office. We need to address the GPS collection practices that lead to incorrectly logged GPS.
crdref <- CRS('+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')
e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e, proj4string = crdref)
ke <- raster::getData("GADM", country='KE', level=2, path = "/Users/mlowes/drive/optimized_agronomy/soil/soil_health_study/data") # the higher the number, the higher the resolution
#ext.rw.ss matches points with spatial polygons in ke
ext.ke.ss <- extract(ke[, "NAME_2"], ss)
## Loading required namespace: rgeos
ss$spatialname <- ext.ke.ss$NAME_2
fke <- fortify(ke, region="NAME_2")
ss.count <- ss@data %>% group_by(spatialname) %>% dplyr::summarize(
count = n()
) %>% as.data.frame()
table(ss.count$spatialname %in% fke$id)
##
## TRUE
## 60
plotReady <- dplyr::left_join(fke, ss.count, by=c("id"="spatialname"))
Tip: Zoom in on subset of full map and filling polygons with base R plot
library(RColorBrewer)
mapRes <- ggplot(plotReady, aes(x=long, y=lat, group=group)) + geom_path() +
geom_polygon(aes(fill=plotReady$count)) +
coord_map(xlim = c(33.5, 36),ylim = c(-2, 1.75)) +
scale_fill_gradientn(colours = brewer.pal(9,"Reds"), # define colors
name = "Sampling Density",
guide = guide_colorbar(legend.direction = "vertical")) +
theme_bw() +
labs(title="Kenya soil health study sampling density", x = "Longitude", y="Latitude")
print(mapRes)
Without ggplot
plotCount <- merge(ke, ss.count, by.x="NAME_2", by.y="spatialname")
#colfunc <- colorRampPalette("red")
#plot(plotCount, col=plotCount@data$count, xlim = c(35, 36),ylim = c(-2, 1.75))
pdf(paste(od, "kenya sampling density.pdf", sep = "/"), width=11, height=8)
print(mapRes)
dev.off()
## quartz_off_screen
## 2
–end